1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
28 #include "double-int.h"
35 #include "fold-const.h"
36 #include "stringpool.h"
39 #include "stor-layout.h"
43 #include "hard-reg-set.h"
44 #include "insn-config.h"
45 #include "conditions.h"
47 #include "insn-codes.h"
48 #include "insn-attr.h"
54 #include "statistics.h"
56 #include "fixed-value.h"
64 #include "diagnostic-core.h"
67 #include "dominance.h"
73 #include "cfgcleanup.h"
74 #include "basic-block.h"
77 #include "target-def.h"
78 #include "common/common-target.h"
79 #include "langhooks.h"
83 #include "plugin-api.h"
86 #include "hash-table.h"
87 #include "tree-ssa-alias.h"
88 #include "internal-fn.h"
89 #include "gimple-fold.h"
91 #include "gimple-expr.h"
97 #include "tm-constrs.h"
101 #include "sched-int.h"
105 #include "diagnostic.h"
106 #include "dumpfile.h"
107 #include "tree-pass.h"
109 #include "pass_manager.h"
110 #include "target-globals.h"
111 #include "tree-vectorizer.h"
112 #include "shrink-wrap.h"
113 #include "builtins.h"
114 #include "rtl-iter.h"
115 #include "tree-iterator.h"
116 #include "tree-chkp.h"
117 #include "rtl-chkp.h"
119 static rtx legitimize_dllimport_symbol (rtx, bool);
120 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
121 static rtx legitimize_pe_coff_symbol (rtx, bool);
123 #ifndef CHECK_STACK_LIMIT
124 #define CHECK_STACK_LIMIT (-1)
127 /* Return index of given mode in mult and division cost tables. */
128 #define MODE_INDEX(mode) \
129 ((mode) == QImode ? 0 \
130 : (mode) == HImode ? 1 \
131 : (mode) == SImode ? 2 \
132 : (mode) == DImode ? 3 \
135 /* Processor costs (relative to an add) */
136 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
137 #define COSTS_N_BYTES(N) ((N) * 2)
139 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
141 static stringop_algs ix86_size_memcpy[2] = {
142 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
143 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
144 static stringop_algs ix86_size_memset[2] = {
145 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
146 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
149 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
150 COSTS_N_BYTES (2), /* cost of an add instruction */
151 COSTS_N_BYTES (3), /* cost of a lea instruction */
152 COSTS_N_BYTES (2), /* variable shift costs */
153 COSTS_N_BYTES (3), /* constant shift costs */
154 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
155 COSTS_N_BYTES (3), /* HI */
156 COSTS_N_BYTES (3), /* SI */
157 COSTS_N_BYTES (3), /* DI */
158 COSTS_N_BYTES (5)}, /* other */
159 0, /* cost of multiply per each bit set */
160 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
161 COSTS_N_BYTES (3), /* HI */
162 COSTS_N_BYTES (3), /* SI */
163 COSTS_N_BYTES (3), /* DI */
164 COSTS_N_BYTES (5)}, /* other */
165 COSTS_N_BYTES (3), /* cost of movsx */
166 COSTS_N_BYTES (3), /* cost of movzx */
167 0, /* "large" insn */
169 2, /* cost for loading QImode using movzbl */
170 {2, 2, 2}, /* cost of loading integer registers
171 in QImode, HImode and SImode.
172 Relative to reg-reg move (2). */
173 {2, 2, 2}, /* cost of storing integer registers */
174 2, /* cost of reg,reg fld/fst */
175 {2, 2, 2}, /* cost of loading fp registers
176 in SFmode, DFmode and XFmode */
177 {2, 2, 2}, /* cost of storing fp registers
178 in SFmode, DFmode and XFmode */
179 3, /* cost of moving MMX register */
180 {3, 3}, /* cost of loading MMX registers
181 in SImode and DImode */
182 {3, 3}, /* cost of storing MMX registers
183 in SImode and DImode */
184 3, /* cost of moving SSE register */
185 {3, 3, 3}, /* cost of loading SSE registers
186 in SImode, DImode and TImode */
187 {3, 3, 3}, /* cost of storing SSE registers
188 in SImode, DImode and TImode */
189 3, /* MMX or SSE register to integer */
190 0, /* size of l1 cache */
191 0, /* size of l2 cache */
192 0, /* size of prefetch block */
193 0, /* number of parallel prefetches */
195 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
196 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
197 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
198 COSTS_N_BYTES (2), /* cost of FABS instruction. */
199 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
200 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
203 1, /* scalar_stmt_cost. */
204 1, /* scalar load_cost. */
205 1, /* scalar_store_cost. */
206 1, /* vec_stmt_cost. */
207 1, /* vec_to_scalar_cost. */
208 1, /* scalar_to_vec_cost. */
209 1, /* vec_align_load_cost. */
210 1, /* vec_unalign_load_cost. */
211 1, /* vec_store_cost. */
212 1, /* cond_taken_branch_cost. */
213 1, /* cond_not_taken_branch_cost. */
216 /* Processor costs (relative to an add) */
217 static stringop_algs i386_memcpy[2] = {
218 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
219 DUMMY_STRINGOP_ALGS};
220 static stringop_algs i386_memset[2] = {
221 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
222 DUMMY_STRINGOP_ALGS};
225 struct processor_costs i386_cost = { /* 386 specific costs */
226 COSTS_N_INSNS (1), /* cost of an add instruction */
227 COSTS_N_INSNS (1), /* cost of a lea instruction */
228 COSTS_N_INSNS (3), /* variable shift costs */
229 COSTS_N_INSNS (2), /* constant shift costs */
230 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
231 COSTS_N_INSNS (6), /* HI */
232 COSTS_N_INSNS (6), /* SI */
233 COSTS_N_INSNS (6), /* DI */
234 COSTS_N_INSNS (6)}, /* other */
235 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
236 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
237 COSTS_N_INSNS (23), /* HI */
238 COSTS_N_INSNS (23), /* SI */
239 COSTS_N_INSNS (23), /* DI */
240 COSTS_N_INSNS (23)}, /* other */
241 COSTS_N_INSNS (3), /* cost of movsx */
242 COSTS_N_INSNS (2), /* cost of movzx */
243 15, /* "large" insn */
245 4, /* cost for loading QImode using movzbl */
246 {2, 4, 2}, /* cost of loading integer registers
247 in QImode, HImode and SImode.
248 Relative to reg-reg move (2). */
249 {2, 4, 2}, /* cost of storing integer registers */
250 2, /* cost of reg,reg fld/fst */
251 {8, 8, 8}, /* cost of loading fp registers
252 in SFmode, DFmode and XFmode */
253 {8, 8, 8}, /* cost of storing fp registers
254 in SFmode, DFmode and XFmode */
255 2, /* cost of moving MMX register */
256 {4, 8}, /* cost of loading MMX registers
257 in SImode and DImode */
258 {4, 8}, /* cost of storing MMX registers
259 in SImode and DImode */
260 2, /* cost of moving SSE register */
261 {4, 8, 16}, /* cost of loading SSE registers
262 in SImode, DImode and TImode */
263 {4, 8, 16}, /* cost of storing SSE registers
264 in SImode, DImode and TImode */
265 3, /* MMX or SSE register to integer */
266 0, /* size of l1 cache */
267 0, /* size of l2 cache */
268 0, /* size of prefetch block */
269 0, /* number of parallel prefetches */
271 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
272 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
273 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
274 COSTS_N_INSNS (22), /* cost of FABS instruction. */
275 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
276 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
292 static stringop_algs i486_memcpy[2] = {
293 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
294 DUMMY_STRINGOP_ALGS};
295 static stringop_algs i486_memset[2] = {
296 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
297 DUMMY_STRINGOP_ALGS};
300 struct processor_costs i486_cost = { /* 486 specific costs */
301 COSTS_N_INSNS (1), /* cost of an add instruction */
302 COSTS_N_INSNS (1), /* cost of a lea instruction */
303 COSTS_N_INSNS (3), /* variable shift costs */
304 COSTS_N_INSNS (2), /* constant shift costs */
305 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
306 COSTS_N_INSNS (12), /* HI */
307 COSTS_N_INSNS (12), /* SI */
308 COSTS_N_INSNS (12), /* DI */
309 COSTS_N_INSNS (12)}, /* other */
310 1, /* cost of multiply per each bit set */
311 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
312 COSTS_N_INSNS (40), /* HI */
313 COSTS_N_INSNS (40), /* SI */
314 COSTS_N_INSNS (40), /* DI */
315 COSTS_N_INSNS (40)}, /* other */
316 COSTS_N_INSNS (3), /* cost of movsx */
317 COSTS_N_INSNS (2), /* cost of movzx */
318 15, /* "large" insn */
320 4, /* cost for loading QImode using movzbl */
321 {2, 4, 2}, /* cost of loading integer registers
322 in QImode, HImode and SImode.
323 Relative to reg-reg move (2). */
324 {2, 4, 2}, /* cost of storing integer registers */
325 2, /* cost of reg,reg fld/fst */
326 {8, 8, 8}, /* cost of loading fp registers
327 in SFmode, DFmode and XFmode */
328 {8, 8, 8}, /* cost of storing fp registers
329 in SFmode, DFmode and XFmode */
330 2, /* cost of moving MMX register */
331 {4, 8}, /* cost of loading MMX registers
332 in SImode and DImode */
333 {4, 8}, /* cost of storing MMX registers
334 in SImode and DImode */
335 2, /* cost of moving SSE register */
336 {4, 8, 16}, /* cost of loading SSE registers
337 in SImode, DImode and TImode */
338 {4, 8, 16}, /* cost of storing SSE registers
339 in SImode, DImode and TImode */
340 3, /* MMX or SSE register to integer */
341 4, /* size of l1 cache. 486 has 8kB cache
342 shared for code and data, so 4kB is
343 not really precise. */
344 4, /* size of l2 cache */
345 0, /* size of prefetch block */
346 0, /* number of parallel prefetches */
348 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
349 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
350 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
351 COSTS_N_INSNS (3), /* cost of FABS instruction. */
352 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
353 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
356 1, /* scalar_stmt_cost. */
357 1, /* scalar load_cost. */
358 1, /* scalar_store_cost. */
359 1, /* vec_stmt_cost. */
360 1, /* vec_to_scalar_cost. */
361 1, /* scalar_to_vec_cost. */
362 1, /* vec_align_load_cost. */
363 2, /* vec_unalign_load_cost. */
364 1, /* vec_store_cost. */
365 3, /* cond_taken_branch_cost. */
366 1, /* cond_not_taken_branch_cost. */
369 static stringop_algs pentium_memcpy[2] = {
370 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
371 DUMMY_STRINGOP_ALGS};
372 static stringop_algs pentium_memset[2] = {
373 {libcall, {{-1, rep_prefix_4_byte, false}}},
374 DUMMY_STRINGOP_ALGS};
377 struct processor_costs pentium_cost = {
378 COSTS_N_INSNS (1), /* cost of an add instruction */
379 COSTS_N_INSNS (1), /* cost of a lea instruction */
380 COSTS_N_INSNS (4), /* variable shift costs */
381 COSTS_N_INSNS (1), /* constant shift costs */
382 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
383 COSTS_N_INSNS (11), /* HI */
384 COSTS_N_INSNS (11), /* SI */
385 COSTS_N_INSNS (11), /* DI */
386 COSTS_N_INSNS (11)}, /* other */
387 0, /* cost of multiply per each bit set */
388 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
389 COSTS_N_INSNS (25), /* HI */
390 COSTS_N_INSNS (25), /* SI */
391 COSTS_N_INSNS (25), /* DI */
392 COSTS_N_INSNS (25)}, /* other */
393 COSTS_N_INSNS (3), /* cost of movsx */
394 COSTS_N_INSNS (2), /* cost of movzx */
395 8, /* "large" insn */
397 6, /* cost for loading QImode using movzbl */
398 {2, 4, 2}, /* cost of loading integer registers
399 in QImode, HImode and SImode.
400 Relative to reg-reg move (2). */
401 {2, 4, 2}, /* cost of storing integer registers */
402 2, /* cost of reg,reg fld/fst */
403 {2, 2, 6}, /* cost of loading fp registers
404 in SFmode, DFmode and XFmode */
405 {4, 4, 6}, /* cost of storing fp registers
406 in SFmode, DFmode and XFmode */
407 8, /* cost of moving MMX register */
408 {8, 8}, /* cost of loading MMX registers
409 in SImode and DImode */
410 {8, 8}, /* cost of storing MMX registers
411 in SImode and DImode */
412 2, /* cost of moving SSE register */
413 {4, 8, 16}, /* cost of loading SSE registers
414 in SImode, DImode and TImode */
415 {4, 8, 16}, /* cost of storing SSE registers
416 in SImode, DImode and TImode */
417 3, /* MMX or SSE register to integer */
418 8, /* size of l1 cache. */
419 8, /* size of l2 cache */
420 0, /* size of prefetch block */
421 0, /* number of parallel prefetches */
423 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
424 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
425 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
426 COSTS_N_INSNS (1), /* cost of FABS instruction. */
427 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
428 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
431 1, /* scalar_stmt_cost. */
432 1, /* scalar load_cost. */
433 1, /* scalar_store_cost. */
434 1, /* vec_stmt_cost. */
435 1, /* vec_to_scalar_cost. */
436 1, /* scalar_to_vec_cost. */
437 1, /* vec_align_load_cost. */
438 2, /* vec_unalign_load_cost. */
439 1, /* vec_store_cost. */
440 3, /* cond_taken_branch_cost. */
441 1, /* cond_not_taken_branch_cost. */
444 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
445 (we ensure the alignment). For small blocks inline loop is still a
446 noticeable win, for bigger blocks either rep movsl or rep movsb is
447 way to go. Rep movsb has apparently more expensive startup time in CPU,
448 but after 4K the difference is down in the noise. */
449 static stringop_algs pentiumpro_memcpy[2] = {
450 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
451 {8192, rep_prefix_4_byte, false},
452 {-1, rep_prefix_1_byte, false}}},
453 DUMMY_STRINGOP_ALGS};
454 static stringop_algs pentiumpro_memset[2] = {
455 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
456 {8192, rep_prefix_4_byte, false},
457 {-1, libcall, false}}},
458 DUMMY_STRINGOP_ALGS};
460 struct processor_costs pentiumpro_cost = {
461 COSTS_N_INSNS (1), /* cost of an add instruction */
462 COSTS_N_INSNS (1), /* cost of a lea instruction */
463 COSTS_N_INSNS (1), /* variable shift costs */
464 COSTS_N_INSNS (1), /* constant shift costs */
465 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
466 COSTS_N_INSNS (4), /* HI */
467 COSTS_N_INSNS (4), /* SI */
468 COSTS_N_INSNS (4), /* DI */
469 COSTS_N_INSNS (4)}, /* other */
470 0, /* cost of multiply per each bit set */
471 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
472 COSTS_N_INSNS (17), /* HI */
473 COSTS_N_INSNS (17), /* SI */
474 COSTS_N_INSNS (17), /* DI */
475 COSTS_N_INSNS (17)}, /* other */
476 COSTS_N_INSNS (1), /* cost of movsx */
477 COSTS_N_INSNS (1), /* cost of movzx */
478 8, /* "large" insn */
480 2, /* cost for loading QImode using movzbl */
481 {4, 4, 4}, /* cost of loading integer registers
482 in QImode, HImode and SImode.
483 Relative to reg-reg move (2). */
484 {2, 2, 2}, /* cost of storing integer registers */
485 2, /* cost of reg,reg fld/fst */
486 {2, 2, 6}, /* cost of loading fp registers
487 in SFmode, DFmode and XFmode */
488 {4, 4, 6}, /* cost of storing fp registers
489 in SFmode, DFmode and XFmode */
490 2, /* cost of moving MMX register */
491 {2, 2}, /* cost of loading MMX registers
492 in SImode and DImode */
493 {2, 2}, /* cost of storing MMX registers
494 in SImode and DImode */
495 2, /* cost of moving SSE register */
496 {2, 2, 8}, /* cost of loading SSE registers
497 in SImode, DImode and TImode */
498 {2, 2, 8}, /* cost of storing SSE registers
499 in SImode, DImode and TImode */
500 3, /* MMX or SSE register to integer */
501 8, /* size of l1 cache. */
502 256, /* size of l2 cache */
503 32, /* size of prefetch block */
504 6, /* number of parallel prefetches */
506 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
507 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
508 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
509 COSTS_N_INSNS (2), /* cost of FABS instruction. */
510 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
511 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
514 1, /* scalar_stmt_cost. */
515 1, /* scalar load_cost. */
516 1, /* scalar_store_cost. */
517 1, /* vec_stmt_cost. */
518 1, /* vec_to_scalar_cost. */
519 1, /* scalar_to_vec_cost. */
520 1, /* vec_align_load_cost. */
521 2, /* vec_unalign_load_cost. */
522 1, /* vec_store_cost. */
523 3, /* cond_taken_branch_cost. */
524 1, /* cond_not_taken_branch_cost. */
527 static stringop_algs geode_memcpy[2] = {
528 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
529 DUMMY_STRINGOP_ALGS};
530 static stringop_algs geode_memset[2] = {
531 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
532 DUMMY_STRINGOP_ALGS};
534 struct processor_costs geode_cost = {
535 COSTS_N_INSNS (1), /* cost of an add instruction */
536 COSTS_N_INSNS (1), /* cost of a lea instruction */
537 COSTS_N_INSNS (2), /* variable shift costs */
538 COSTS_N_INSNS (1), /* constant shift costs */
539 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
540 COSTS_N_INSNS (4), /* HI */
541 COSTS_N_INSNS (7), /* SI */
542 COSTS_N_INSNS (7), /* DI */
543 COSTS_N_INSNS (7)}, /* other */
544 0, /* cost of multiply per each bit set */
545 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
546 COSTS_N_INSNS (23), /* HI */
547 COSTS_N_INSNS (39), /* SI */
548 COSTS_N_INSNS (39), /* DI */
549 COSTS_N_INSNS (39)}, /* other */
550 COSTS_N_INSNS (1), /* cost of movsx */
551 COSTS_N_INSNS (1), /* cost of movzx */
552 8, /* "large" insn */
554 1, /* cost for loading QImode using movzbl */
555 {1, 1, 1}, /* cost of loading integer registers
556 in QImode, HImode and SImode.
557 Relative to reg-reg move (2). */
558 {1, 1, 1}, /* cost of storing integer registers */
559 1, /* cost of reg,reg fld/fst */
560 {1, 1, 1}, /* cost of loading fp registers
561 in SFmode, DFmode and XFmode */
562 {4, 6, 6}, /* cost of storing fp registers
563 in SFmode, DFmode and XFmode */
565 1, /* cost of moving MMX register */
566 {1, 1}, /* cost of loading MMX registers
567 in SImode and DImode */
568 {1, 1}, /* cost of storing MMX registers
569 in SImode and DImode */
570 1, /* cost of moving SSE register */
571 {1, 1, 1}, /* cost of loading SSE registers
572 in SImode, DImode and TImode */
573 {1, 1, 1}, /* cost of storing SSE registers
574 in SImode, DImode and TImode */
575 1, /* MMX or SSE register to integer */
576 64, /* size of l1 cache. */
577 128, /* size of l2 cache. */
578 32, /* size of prefetch block */
579 1, /* number of parallel prefetches */
581 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
582 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
583 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
584 COSTS_N_INSNS (1), /* cost of FABS instruction. */
585 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
586 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
589 1, /* scalar_stmt_cost. */
590 1, /* scalar load_cost. */
591 1, /* scalar_store_cost. */
592 1, /* vec_stmt_cost. */
593 1, /* vec_to_scalar_cost. */
594 1, /* scalar_to_vec_cost. */
595 1, /* vec_align_load_cost. */
596 2, /* vec_unalign_load_cost. */
597 1, /* vec_store_cost. */
598 3, /* cond_taken_branch_cost. */
599 1, /* cond_not_taken_branch_cost. */
602 static stringop_algs k6_memcpy[2] = {
603 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
604 DUMMY_STRINGOP_ALGS};
605 static stringop_algs k6_memset[2] = {
606 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
607 DUMMY_STRINGOP_ALGS};
609 struct processor_costs k6_cost = {
610 COSTS_N_INSNS (1), /* cost of an add instruction */
611 COSTS_N_INSNS (2), /* cost of a lea instruction */
612 COSTS_N_INSNS (1), /* variable shift costs */
613 COSTS_N_INSNS (1), /* constant shift costs */
614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
615 COSTS_N_INSNS (3), /* HI */
616 COSTS_N_INSNS (3), /* SI */
617 COSTS_N_INSNS (3), /* DI */
618 COSTS_N_INSNS (3)}, /* other */
619 0, /* cost of multiply per each bit set */
620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
621 COSTS_N_INSNS (18), /* HI */
622 COSTS_N_INSNS (18), /* SI */
623 COSTS_N_INSNS (18), /* DI */
624 COSTS_N_INSNS (18)}, /* other */
625 COSTS_N_INSNS (2), /* cost of movsx */
626 COSTS_N_INSNS (2), /* cost of movzx */
627 8, /* "large" insn */
629 3, /* cost for loading QImode using movzbl */
630 {4, 5, 4}, /* cost of loading integer registers
631 in QImode, HImode and SImode.
632 Relative to reg-reg move (2). */
633 {2, 3, 2}, /* cost of storing integer registers */
634 4, /* cost of reg,reg fld/fst */
635 {6, 6, 6}, /* cost of loading fp registers
636 in SFmode, DFmode and XFmode */
637 {4, 4, 4}, /* cost of storing fp registers
638 in SFmode, DFmode and XFmode */
639 2, /* cost of moving MMX register */
640 {2, 2}, /* cost of loading MMX registers
641 in SImode and DImode */
642 {2, 2}, /* cost of storing MMX registers
643 in SImode and DImode */
644 2, /* cost of moving SSE register */
645 {2, 2, 8}, /* cost of loading SSE registers
646 in SImode, DImode and TImode */
647 {2, 2, 8}, /* cost of storing SSE registers
648 in SImode, DImode and TImode */
649 6, /* MMX or SSE register to integer */
650 32, /* size of l1 cache. */
651 32, /* size of l2 cache. Some models
652 have integrated l2 cache, but
653 optimizing for k6 is not important
654 enough to worry about that. */
655 32, /* size of prefetch block */
656 1, /* number of parallel prefetches */
658 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
659 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
660 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
661 COSTS_N_INSNS (2), /* cost of FABS instruction. */
662 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
663 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
666 1, /* scalar_stmt_cost. */
667 1, /* scalar load_cost. */
668 1, /* scalar_store_cost. */
669 1, /* vec_stmt_cost. */
670 1, /* vec_to_scalar_cost. */
671 1, /* scalar_to_vec_cost. */
672 1, /* vec_align_load_cost. */
673 2, /* vec_unalign_load_cost. */
674 1, /* vec_store_cost. */
675 3, /* cond_taken_branch_cost. */
676 1, /* cond_not_taken_branch_cost. */
679 /* For some reason, Athlon deals better with REP prefix (relative to loops)
680 compared to K8. Alignment becomes important after 8 bytes for memcpy and
681 128 bytes for memset. */
682 static stringop_algs athlon_memcpy[2] = {
683 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
684 DUMMY_STRINGOP_ALGS};
685 static stringop_algs athlon_memset[2] = {
686 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
687 DUMMY_STRINGOP_ALGS};
689 struct processor_costs athlon_cost = {
690 COSTS_N_INSNS (1), /* cost of an add instruction */
691 COSTS_N_INSNS (2), /* cost of a lea instruction */
692 COSTS_N_INSNS (1), /* variable shift costs */
693 COSTS_N_INSNS (1), /* constant shift costs */
694 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
695 COSTS_N_INSNS (5), /* HI */
696 COSTS_N_INSNS (5), /* SI */
697 COSTS_N_INSNS (5), /* DI */
698 COSTS_N_INSNS (5)}, /* other */
699 0, /* cost of multiply per each bit set */
700 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
701 COSTS_N_INSNS (26), /* HI */
702 COSTS_N_INSNS (42), /* SI */
703 COSTS_N_INSNS (74), /* DI */
704 COSTS_N_INSNS (74)}, /* other */
705 COSTS_N_INSNS (1), /* cost of movsx */
706 COSTS_N_INSNS (1), /* cost of movzx */
707 8, /* "large" insn */
709 4, /* cost for loading QImode using movzbl */
710 {3, 4, 3}, /* cost of loading integer registers
711 in QImode, HImode and SImode.
712 Relative to reg-reg move (2). */
713 {3, 4, 3}, /* cost of storing integer registers */
714 4, /* cost of reg,reg fld/fst */
715 {4, 4, 12}, /* cost of loading fp registers
716 in SFmode, DFmode and XFmode */
717 {6, 6, 8}, /* cost of storing fp registers
718 in SFmode, DFmode and XFmode */
719 2, /* cost of moving MMX register */
720 {4, 4}, /* cost of loading MMX registers
721 in SImode and DImode */
722 {4, 4}, /* cost of storing MMX registers
723 in SImode and DImode */
724 2, /* cost of moving SSE register */
725 {4, 4, 6}, /* cost of loading SSE registers
726 in SImode, DImode and TImode */
727 {4, 4, 5}, /* cost of storing SSE registers
728 in SImode, DImode and TImode */
729 5, /* MMX or SSE register to integer */
730 64, /* size of l1 cache. */
731 256, /* size of l2 cache. */
732 64, /* size of prefetch block */
733 6, /* number of parallel prefetches */
735 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
736 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
737 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
738 COSTS_N_INSNS (2), /* cost of FABS instruction. */
739 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
740 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
743 1, /* scalar_stmt_cost. */
744 1, /* scalar load_cost. */
745 1, /* scalar_store_cost. */
746 1, /* vec_stmt_cost. */
747 1, /* vec_to_scalar_cost. */
748 1, /* scalar_to_vec_cost. */
749 1, /* vec_align_load_cost. */
750 2, /* vec_unalign_load_cost. */
751 1, /* vec_store_cost. */
752 3, /* cond_taken_branch_cost. */
753 1, /* cond_not_taken_branch_cost. */
756 /* K8 has optimized REP instruction for medium sized blocks, but for very
757 small blocks it is better to use loop. For large blocks, libcall can
758 do nontemporary accesses and beat inline considerably. */
759 static stringop_algs k8_memcpy[2] = {
760 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
761 {-1, rep_prefix_4_byte, false}}},
762 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
763 {-1, libcall, false}}}};
764 static stringop_algs k8_memset[2] = {
765 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
766 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
767 {libcall, {{48, unrolled_loop, false},
768 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
770 struct processor_costs k8_cost = {
771 COSTS_N_INSNS (1), /* cost of an add instruction */
772 COSTS_N_INSNS (2), /* cost of a lea instruction */
773 COSTS_N_INSNS (1), /* variable shift costs */
774 COSTS_N_INSNS (1), /* constant shift costs */
775 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
776 COSTS_N_INSNS (4), /* HI */
777 COSTS_N_INSNS (3), /* SI */
778 COSTS_N_INSNS (4), /* DI */
779 COSTS_N_INSNS (5)}, /* other */
780 0, /* cost of multiply per each bit set */
781 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
782 COSTS_N_INSNS (26), /* HI */
783 COSTS_N_INSNS (42), /* SI */
784 COSTS_N_INSNS (74), /* DI */
785 COSTS_N_INSNS (74)}, /* other */
786 COSTS_N_INSNS (1), /* cost of movsx */
787 COSTS_N_INSNS (1), /* cost of movzx */
788 8, /* "large" insn */
790 4, /* cost for loading QImode using movzbl */
791 {3, 4, 3}, /* cost of loading integer registers
792 in QImode, HImode and SImode.
793 Relative to reg-reg move (2). */
794 {3, 4, 3}, /* cost of storing integer registers */
795 4, /* cost of reg,reg fld/fst */
796 {4, 4, 12}, /* cost of loading fp registers
797 in SFmode, DFmode and XFmode */
798 {6, 6, 8}, /* cost of storing fp registers
799 in SFmode, DFmode and XFmode */
800 2, /* cost of moving MMX register */
801 {3, 3}, /* cost of loading MMX registers
802 in SImode and DImode */
803 {4, 4}, /* cost of storing MMX registers
804 in SImode and DImode */
805 2, /* cost of moving SSE register */
806 {4, 3, 6}, /* cost of loading SSE registers
807 in SImode, DImode and TImode */
808 {4, 4, 5}, /* cost of storing SSE registers
809 in SImode, DImode and TImode */
810 5, /* MMX or SSE register to integer */
811 64, /* size of l1 cache. */
812 512, /* size of l2 cache. */
813 64, /* size of prefetch block */
814 /* New AMD processors never drop prefetches; if they cannot be performed
815 immediately, they are queued. We set number of simultaneous prefetches
816 to a large constant to reflect this (it probably is not a good idea not
817 to limit number of prefetches at all, as their execution also takes some
819 100, /* number of parallel prefetches */
821 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
822 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
823 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
824 COSTS_N_INSNS (2), /* cost of FABS instruction. */
825 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
826 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
830 4, /* scalar_stmt_cost. */
831 2, /* scalar load_cost. */
832 2, /* scalar_store_cost. */
833 5, /* vec_stmt_cost. */
834 0, /* vec_to_scalar_cost. */
835 2, /* scalar_to_vec_cost. */
836 2, /* vec_align_load_cost. */
837 3, /* vec_unalign_load_cost. */
838 3, /* vec_store_cost. */
839 3, /* cond_taken_branch_cost. */
840 2, /* cond_not_taken_branch_cost. */
843 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
844 very small blocks it is better to use loop. For large blocks, libcall can
845 do nontemporary accesses and beat inline considerably. */
846 static stringop_algs amdfam10_memcpy[2] = {
847 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
848 {-1, rep_prefix_4_byte, false}}},
849 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
850 {-1, libcall, false}}}};
851 static stringop_algs amdfam10_memset[2] = {
852 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
853 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
854 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
855 {-1, libcall, false}}}};
856 struct processor_costs amdfam10_cost = {
857 COSTS_N_INSNS (1), /* cost of an add instruction */
858 COSTS_N_INSNS (2), /* cost of a lea instruction */
859 COSTS_N_INSNS (1), /* variable shift costs */
860 COSTS_N_INSNS (1), /* constant shift costs */
861 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
862 COSTS_N_INSNS (4), /* HI */
863 COSTS_N_INSNS (3), /* SI */
864 COSTS_N_INSNS (4), /* DI */
865 COSTS_N_INSNS (5)}, /* other */
866 0, /* cost of multiply per each bit set */
867 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
868 COSTS_N_INSNS (35), /* HI */
869 COSTS_N_INSNS (51), /* SI */
870 COSTS_N_INSNS (83), /* DI */
871 COSTS_N_INSNS (83)}, /* other */
872 COSTS_N_INSNS (1), /* cost of movsx */
873 COSTS_N_INSNS (1), /* cost of movzx */
874 8, /* "large" insn */
876 4, /* cost for loading QImode using movzbl */
877 {3, 4, 3}, /* cost of loading integer registers
878 in QImode, HImode and SImode.
879 Relative to reg-reg move (2). */
880 {3, 4, 3}, /* cost of storing integer registers */
881 4, /* cost of reg,reg fld/fst */
882 {4, 4, 12}, /* cost of loading fp registers
883 in SFmode, DFmode and XFmode */
884 {6, 6, 8}, /* cost of storing fp registers
885 in SFmode, DFmode and XFmode */
886 2, /* cost of moving MMX register */
887 {3, 3}, /* cost of loading MMX registers
888 in SImode and DImode */
889 {4, 4}, /* cost of storing MMX registers
890 in SImode and DImode */
891 2, /* cost of moving SSE register */
892 {4, 4, 3}, /* cost of loading SSE registers
893 in SImode, DImode and TImode */
894 {4, 4, 5}, /* cost of storing SSE registers
895 in SImode, DImode and TImode */
896 3, /* MMX or SSE register to integer */
898 MOVD reg64, xmmreg Double FSTORE 4
899 MOVD reg32, xmmreg Double FSTORE 4
901 MOVD reg64, xmmreg Double FADD 3
903 MOVD reg32, xmmreg Double FADD 3
905 64, /* size of l1 cache. */
906 512, /* size of l2 cache. */
907 64, /* size of prefetch block */
908 /* New AMD processors never drop prefetches; if they cannot be performed
909 immediately, they are queued. We set number of simultaneous prefetches
910 to a large constant to reflect this (it probably is not a good idea not
911 to limit number of prefetches at all, as their execution also takes some
913 100, /* number of parallel prefetches */
915 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
916 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
917 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
918 COSTS_N_INSNS (2), /* cost of FABS instruction. */
919 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
920 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
924 4, /* scalar_stmt_cost. */
925 2, /* scalar load_cost. */
926 2, /* scalar_store_cost. */
927 6, /* vec_stmt_cost. */
928 0, /* vec_to_scalar_cost. */
929 2, /* scalar_to_vec_cost. */
930 2, /* vec_align_load_cost. */
931 2, /* vec_unalign_load_cost. */
932 2, /* vec_store_cost. */
933 2, /* cond_taken_branch_cost. */
934 1, /* cond_not_taken_branch_cost. */
937 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
938 very small blocks it is better to use loop. For large blocks, libcall
939 can do nontemporary accesses and beat inline considerably. */
940 static stringop_algs bdver1_memcpy[2] = {
941 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
942 {-1, rep_prefix_4_byte, false}}},
943 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
944 {-1, libcall, false}}}};
945 static stringop_algs bdver1_memset[2] = {
946 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
947 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
948 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
949 {-1, libcall, false}}}};
951 const struct processor_costs bdver1_cost = {
952 COSTS_N_INSNS (1), /* cost of an add instruction */
953 COSTS_N_INSNS (1), /* cost of a lea instruction */
954 COSTS_N_INSNS (1), /* variable shift costs */
955 COSTS_N_INSNS (1), /* constant shift costs */
956 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
957 COSTS_N_INSNS (4), /* HI */
958 COSTS_N_INSNS (4), /* SI */
959 COSTS_N_INSNS (6), /* DI */
960 COSTS_N_INSNS (6)}, /* other */
961 0, /* cost of multiply per each bit set */
962 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
963 COSTS_N_INSNS (35), /* HI */
964 COSTS_N_INSNS (51), /* SI */
965 COSTS_N_INSNS (83), /* DI */
966 COSTS_N_INSNS (83)}, /* other */
967 COSTS_N_INSNS (1), /* cost of movsx */
968 COSTS_N_INSNS (1), /* cost of movzx */
969 8, /* "large" insn */
971 4, /* cost for loading QImode using movzbl */
972 {5, 5, 4}, /* cost of loading integer registers
973 in QImode, HImode and SImode.
974 Relative to reg-reg move (2). */
975 {4, 4, 4}, /* cost of storing integer registers */
976 2, /* cost of reg,reg fld/fst */
977 {5, 5, 12}, /* cost of loading fp registers
978 in SFmode, DFmode and XFmode */
979 {4, 4, 8}, /* cost of storing fp registers
980 in SFmode, DFmode and XFmode */
981 2, /* cost of moving MMX register */
982 {4, 4}, /* cost of loading MMX registers
983 in SImode and DImode */
984 {4, 4}, /* cost of storing MMX registers
985 in SImode and DImode */
986 2, /* cost of moving SSE register */
987 {4, 4, 4}, /* cost of loading SSE registers
988 in SImode, DImode and TImode */
989 {4, 4, 4}, /* cost of storing SSE registers
990 in SImode, DImode and TImode */
991 2, /* MMX or SSE register to integer */
993 MOVD reg64, xmmreg Double FSTORE 4
994 MOVD reg32, xmmreg Double FSTORE 4
996 MOVD reg64, xmmreg Double FADD 3
998 MOVD reg32, xmmreg Double FADD 3
1000 16, /* size of l1 cache. */
1001 2048, /* size of l2 cache. */
1002 64, /* size of prefetch block */
1003 /* New AMD processors never drop prefetches; if they cannot be performed
1004 immediately, they are queued. We set number of simultaneous prefetches
1005 to a large constant to reflect this (it probably is not a good idea not
1006 to limit number of prefetches at all, as their execution also takes some
1008 100, /* number of parallel prefetches */
1009 2, /* Branch cost */
1010 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1011 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1012 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1013 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1014 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1015 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1019 6, /* scalar_stmt_cost. */
1020 4, /* scalar load_cost. */
1021 4, /* scalar_store_cost. */
1022 6, /* vec_stmt_cost. */
1023 0, /* vec_to_scalar_cost. */
1024 2, /* scalar_to_vec_cost. */
1025 4, /* vec_align_load_cost. */
1026 4, /* vec_unalign_load_cost. */
1027 4, /* vec_store_cost. */
1028 2, /* cond_taken_branch_cost. */
1029 1, /* cond_not_taken_branch_cost. */
1032 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1033 very small blocks it is better to use loop. For large blocks, libcall
1034 can do nontemporary accesses and beat inline considerably. */
1036 static stringop_algs bdver2_memcpy[2] = {
1037 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1038 {-1, rep_prefix_4_byte, false}}},
1039 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1040 {-1, libcall, false}}}};
1041 static stringop_algs bdver2_memset[2] = {
1042 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1043 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1044 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1045 {-1, libcall, false}}}};
1047 const struct processor_costs bdver2_cost = {
1048 COSTS_N_INSNS (1), /* cost of an add instruction */
1049 COSTS_N_INSNS (1), /* cost of a lea instruction */
1050 COSTS_N_INSNS (1), /* variable shift costs */
1051 COSTS_N_INSNS (1), /* constant shift costs */
1052 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1053 COSTS_N_INSNS (4), /* HI */
1054 COSTS_N_INSNS (4), /* SI */
1055 COSTS_N_INSNS (6), /* DI */
1056 COSTS_N_INSNS (6)}, /* other */
1057 0, /* cost of multiply per each bit set */
1058 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1059 COSTS_N_INSNS (35), /* HI */
1060 COSTS_N_INSNS (51), /* SI */
1061 COSTS_N_INSNS (83), /* DI */
1062 COSTS_N_INSNS (83)}, /* other */
1063 COSTS_N_INSNS (1), /* cost of movsx */
1064 COSTS_N_INSNS (1), /* cost of movzx */
1065 8, /* "large" insn */
1067 4, /* cost for loading QImode using movzbl */
1068 {5, 5, 4}, /* cost of loading integer registers
1069 in QImode, HImode and SImode.
1070 Relative to reg-reg move (2). */
1071 {4, 4, 4}, /* cost of storing integer registers */
1072 2, /* cost of reg,reg fld/fst */
1073 {5, 5, 12}, /* cost of loading fp registers
1074 in SFmode, DFmode and XFmode */
1075 {4, 4, 8}, /* cost of storing fp registers
1076 in SFmode, DFmode and XFmode */
1077 2, /* cost of moving MMX register */
1078 {4, 4}, /* cost of loading MMX registers
1079 in SImode and DImode */
1080 {4, 4}, /* cost of storing MMX registers
1081 in SImode and DImode */
1082 2, /* cost of moving SSE register */
1083 {4, 4, 4}, /* cost of loading SSE registers
1084 in SImode, DImode and TImode */
1085 {4, 4, 4}, /* cost of storing SSE registers
1086 in SImode, DImode and TImode */
1087 2, /* MMX or SSE register to integer */
1089 MOVD reg64, xmmreg Double FSTORE 4
1090 MOVD reg32, xmmreg Double FSTORE 4
1092 MOVD reg64, xmmreg Double FADD 3
1094 MOVD reg32, xmmreg Double FADD 3
1096 16, /* size of l1 cache. */
1097 2048, /* size of l2 cache. */
1098 64, /* size of prefetch block */
1099 /* New AMD processors never drop prefetches; if they cannot be performed
1100 immediately, they are queued. We set number of simultaneous prefetches
1101 to a large constant to reflect this (it probably is not a good idea not
1102 to limit number of prefetches at all, as their execution also takes some
1104 100, /* number of parallel prefetches */
1105 2, /* Branch cost */
1106 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1107 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1108 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1109 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1110 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1111 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1115 6, /* scalar_stmt_cost. */
1116 4, /* scalar load_cost. */
1117 4, /* scalar_store_cost. */
1118 6, /* vec_stmt_cost. */
1119 0, /* vec_to_scalar_cost. */
1120 2, /* scalar_to_vec_cost. */
1121 4, /* vec_align_load_cost. */
1122 4, /* vec_unalign_load_cost. */
1123 4, /* vec_store_cost. */
1124 2, /* cond_taken_branch_cost. */
1125 1, /* cond_not_taken_branch_cost. */
1129 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1130 very small blocks it is better to use loop. For large blocks, libcall
1131 can do nontemporary accesses and beat inline considerably. */
1132 static stringop_algs bdver3_memcpy[2] = {
1133 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1134 {-1, rep_prefix_4_byte, false}}},
1135 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1136 {-1, libcall, false}}}};
1137 static stringop_algs bdver3_memset[2] = {
1138 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1139 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1140 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1141 {-1, libcall, false}}}};
1142 struct processor_costs bdver3_cost = {
1143 COSTS_N_INSNS (1), /* cost of an add instruction */
1144 COSTS_N_INSNS (1), /* cost of a lea instruction */
1145 COSTS_N_INSNS (1), /* variable shift costs */
1146 COSTS_N_INSNS (1), /* constant shift costs */
1147 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1148 COSTS_N_INSNS (4), /* HI */
1149 COSTS_N_INSNS (4), /* SI */
1150 COSTS_N_INSNS (6), /* DI */
1151 COSTS_N_INSNS (6)}, /* other */
1152 0, /* cost of multiply per each bit set */
1153 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1154 COSTS_N_INSNS (35), /* HI */
1155 COSTS_N_INSNS (51), /* SI */
1156 COSTS_N_INSNS (83), /* DI */
1157 COSTS_N_INSNS (83)}, /* other */
1158 COSTS_N_INSNS (1), /* cost of movsx */
1159 COSTS_N_INSNS (1), /* cost of movzx */
1160 8, /* "large" insn */
1162 4, /* cost for loading QImode using movzbl */
1163 {5, 5, 4}, /* cost of loading integer registers
1164 in QImode, HImode and SImode.
1165 Relative to reg-reg move (2). */
1166 {4, 4, 4}, /* cost of storing integer registers */
1167 2, /* cost of reg,reg fld/fst */
1168 {5, 5, 12}, /* cost of loading fp registers
1169 in SFmode, DFmode and XFmode */
1170 {4, 4, 8}, /* cost of storing fp registers
1171 in SFmode, DFmode and XFmode */
1172 2, /* cost of moving MMX register */
1173 {4, 4}, /* cost of loading MMX registers
1174 in SImode and DImode */
1175 {4, 4}, /* cost of storing MMX registers
1176 in SImode and DImode */
1177 2, /* cost of moving SSE register */
1178 {4, 4, 4}, /* cost of loading SSE registers
1179 in SImode, DImode and TImode */
1180 {4, 4, 4}, /* cost of storing SSE registers
1181 in SImode, DImode and TImode */
1182 2, /* MMX or SSE register to integer */
1183 16, /* size of l1 cache. */
1184 2048, /* size of l2 cache. */
1185 64, /* size of prefetch block */
1186 /* New AMD processors never drop prefetches; if they cannot be performed
1187 immediately, they are queued. We set number of simultaneous prefetches
1188 to a large constant to reflect this (it probably is not a good idea not
1189 to limit number of prefetches at all, as their execution also takes some
1191 100, /* number of parallel prefetches */
1192 2, /* Branch cost */
1193 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1194 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1195 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1196 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1197 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1198 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1202 6, /* scalar_stmt_cost. */
1203 4, /* scalar load_cost. */
1204 4, /* scalar_store_cost. */
1205 6, /* vec_stmt_cost. */
1206 0, /* vec_to_scalar_cost. */
1207 2, /* scalar_to_vec_cost. */
1208 4, /* vec_align_load_cost. */
1209 4, /* vec_unalign_load_cost. */
1210 4, /* vec_store_cost. */
1211 2, /* cond_taken_branch_cost. */
1212 1, /* cond_not_taken_branch_cost. */
1215 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1216 very small blocks it is better to use loop. For large blocks, libcall
1217 can do nontemporary accesses and beat inline considerably. */
1218 static stringop_algs bdver4_memcpy[2] = {
1219 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1220 {-1, rep_prefix_4_byte, false}}},
1221 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1222 {-1, libcall, false}}}};
1223 static stringop_algs bdver4_memset[2] = {
1224 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1225 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1226 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1227 {-1, libcall, false}}}};
1228 struct processor_costs bdver4_cost = {
1229 COSTS_N_INSNS (1), /* cost of an add instruction */
1230 COSTS_N_INSNS (1), /* cost of a lea instruction */
1231 COSTS_N_INSNS (1), /* variable shift costs */
1232 COSTS_N_INSNS (1), /* constant shift costs */
1233 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1234 COSTS_N_INSNS (4), /* HI */
1235 COSTS_N_INSNS (4), /* SI */
1236 COSTS_N_INSNS (6), /* DI */
1237 COSTS_N_INSNS (6)}, /* other */
1238 0, /* cost of multiply per each bit set */
1239 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1240 COSTS_N_INSNS (35), /* HI */
1241 COSTS_N_INSNS (51), /* SI */
1242 COSTS_N_INSNS (83), /* DI */
1243 COSTS_N_INSNS (83)}, /* other */
1244 COSTS_N_INSNS (1), /* cost of movsx */
1245 COSTS_N_INSNS (1), /* cost of movzx */
1246 8, /* "large" insn */
1248 4, /* cost for loading QImode using movzbl */
1249 {5, 5, 4}, /* cost of loading integer registers
1250 in QImode, HImode and SImode.
1251 Relative to reg-reg move (2). */
1252 {4, 4, 4}, /* cost of storing integer registers */
1253 2, /* cost of reg,reg fld/fst */
1254 {5, 5, 12}, /* cost of loading fp registers
1255 in SFmode, DFmode and XFmode */
1256 {4, 4, 8}, /* cost of storing fp registers
1257 in SFmode, DFmode and XFmode */
1258 2, /* cost of moving MMX register */
1259 {4, 4}, /* cost of loading MMX registers
1260 in SImode and DImode */
1261 {4, 4}, /* cost of storing MMX registers
1262 in SImode and DImode */
1263 2, /* cost of moving SSE register */
1264 {4, 4, 4}, /* cost of loading SSE registers
1265 in SImode, DImode and TImode */
1266 {4, 4, 4}, /* cost of storing SSE registers
1267 in SImode, DImode and TImode */
1268 2, /* MMX or SSE register to integer */
1269 16, /* size of l1 cache. */
1270 2048, /* size of l2 cache. */
1271 64, /* size of prefetch block */
1272 /* New AMD processors never drop prefetches; if they cannot be performed
1273 immediately, they are queued. We set number of simultaneous prefetches
1274 to a large constant to reflect this (it probably is not a good idea not
1275 to limit number of prefetches at all, as their execution also takes some
1277 100, /* number of parallel prefetches */
1278 2, /* Branch cost */
1279 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1280 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1281 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1282 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1283 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1284 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1288 6, /* scalar_stmt_cost. */
1289 4, /* scalar load_cost. */
1290 4, /* scalar_store_cost. */
1291 6, /* vec_stmt_cost. */
1292 0, /* vec_to_scalar_cost. */
1293 2, /* scalar_to_vec_cost. */
1294 4, /* vec_align_load_cost. */
1295 4, /* vec_unalign_load_cost. */
1296 4, /* vec_store_cost. */
1297 2, /* cond_taken_branch_cost. */
1298 1, /* cond_not_taken_branch_cost. */
1301 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1302 very small blocks it is better to use loop. For large blocks, libcall can
1303 do nontemporary accesses and beat inline considerably. */
1304 static stringop_algs btver1_memcpy[2] = {
1305 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1306 {-1, rep_prefix_4_byte, false}}},
1307 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1308 {-1, libcall, false}}}};
1309 static stringop_algs btver1_memset[2] = {
1310 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1311 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1312 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1313 {-1, libcall, false}}}};
1314 const struct processor_costs btver1_cost = {
1315 COSTS_N_INSNS (1), /* cost of an add instruction */
1316 COSTS_N_INSNS (2), /* cost of a lea instruction */
1317 COSTS_N_INSNS (1), /* variable shift costs */
1318 COSTS_N_INSNS (1), /* constant shift costs */
1319 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1320 COSTS_N_INSNS (4), /* HI */
1321 COSTS_N_INSNS (3), /* SI */
1322 COSTS_N_INSNS (4), /* DI */
1323 COSTS_N_INSNS (5)}, /* other */
1324 0, /* cost of multiply per each bit set */
1325 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1326 COSTS_N_INSNS (35), /* HI */
1327 COSTS_N_INSNS (51), /* SI */
1328 COSTS_N_INSNS (83), /* DI */
1329 COSTS_N_INSNS (83)}, /* other */
1330 COSTS_N_INSNS (1), /* cost of movsx */
1331 COSTS_N_INSNS (1), /* cost of movzx */
1332 8, /* "large" insn */
1334 4, /* cost for loading QImode using movzbl */
1335 {3, 4, 3}, /* cost of loading integer registers
1336 in QImode, HImode and SImode.
1337 Relative to reg-reg move (2). */
1338 {3, 4, 3}, /* cost of storing integer registers */
1339 4, /* cost of reg,reg fld/fst */
1340 {4, 4, 12}, /* cost of loading fp registers
1341 in SFmode, DFmode and XFmode */
1342 {6, 6, 8}, /* cost of storing fp registers
1343 in SFmode, DFmode and XFmode */
1344 2, /* cost of moving MMX register */
1345 {3, 3}, /* cost of loading MMX registers
1346 in SImode and DImode */
1347 {4, 4}, /* cost of storing MMX registers
1348 in SImode and DImode */
1349 2, /* cost of moving SSE register */
1350 {4, 4, 3}, /* cost of loading SSE registers
1351 in SImode, DImode and TImode */
1352 {4, 4, 5}, /* cost of storing SSE registers
1353 in SImode, DImode and TImode */
1354 3, /* MMX or SSE register to integer */
1356 MOVD reg64, xmmreg Double FSTORE 4
1357 MOVD reg32, xmmreg Double FSTORE 4
1359 MOVD reg64, xmmreg Double FADD 3
1361 MOVD reg32, xmmreg Double FADD 3
1363 32, /* size of l1 cache. */
1364 512, /* size of l2 cache. */
1365 64, /* size of prefetch block */
1366 100, /* number of parallel prefetches */
1367 2, /* Branch cost */
1368 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1369 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1370 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1371 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1372 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1373 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1377 4, /* scalar_stmt_cost. */
1378 2, /* scalar load_cost. */
1379 2, /* scalar_store_cost. */
1380 6, /* vec_stmt_cost. */
1381 0, /* vec_to_scalar_cost. */
1382 2, /* scalar_to_vec_cost. */
1383 2, /* vec_align_load_cost. */
1384 2, /* vec_unalign_load_cost. */
1385 2, /* vec_store_cost. */
1386 2, /* cond_taken_branch_cost. */
1387 1, /* cond_not_taken_branch_cost. */
1390 static stringop_algs btver2_memcpy[2] = {
1391 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1392 {-1, rep_prefix_4_byte, false}}},
1393 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1394 {-1, libcall, false}}}};
1395 static stringop_algs btver2_memset[2] = {
1396 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1397 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1398 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1399 {-1, libcall, false}}}};
1400 const struct processor_costs btver2_cost = {
1401 COSTS_N_INSNS (1), /* cost of an add instruction */
1402 COSTS_N_INSNS (2), /* cost of a lea instruction */
1403 COSTS_N_INSNS (1), /* variable shift costs */
1404 COSTS_N_INSNS (1), /* constant shift costs */
1405 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1406 COSTS_N_INSNS (4), /* HI */
1407 COSTS_N_INSNS (3), /* SI */
1408 COSTS_N_INSNS (4), /* DI */
1409 COSTS_N_INSNS (5)}, /* other */
1410 0, /* cost of multiply per each bit set */
1411 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1412 COSTS_N_INSNS (35), /* HI */
1413 COSTS_N_INSNS (51), /* SI */
1414 COSTS_N_INSNS (83), /* DI */
1415 COSTS_N_INSNS (83)}, /* other */
1416 COSTS_N_INSNS (1), /* cost of movsx */
1417 COSTS_N_INSNS (1), /* cost of movzx */
1418 8, /* "large" insn */
1420 4, /* cost for loading QImode using movzbl */
1421 {3, 4, 3}, /* cost of loading integer registers
1422 in QImode, HImode and SImode.
1423 Relative to reg-reg move (2). */
1424 {3, 4, 3}, /* cost of storing integer registers */
1425 4, /* cost of reg,reg fld/fst */
1426 {4, 4, 12}, /* cost of loading fp registers
1427 in SFmode, DFmode and XFmode */
1428 {6, 6, 8}, /* cost of storing fp registers
1429 in SFmode, DFmode and XFmode */
1430 2, /* cost of moving MMX register */
1431 {3, 3}, /* cost of loading MMX registers
1432 in SImode and DImode */
1433 {4, 4}, /* cost of storing MMX registers
1434 in SImode and DImode */
1435 2, /* cost of moving SSE register */
1436 {4, 4, 3}, /* cost of loading SSE registers
1437 in SImode, DImode and TImode */
1438 {4, 4, 5}, /* cost of storing SSE registers
1439 in SImode, DImode and TImode */
1440 3, /* MMX or SSE register to integer */
1442 MOVD reg64, xmmreg Double FSTORE 4
1443 MOVD reg32, xmmreg Double FSTORE 4
1445 MOVD reg64, xmmreg Double FADD 3
1447 MOVD reg32, xmmreg Double FADD 3
1449 32, /* size of l1 cache. */
1450 2048, /* size of l2 cache. */
1451 64, /* size of prefetch block */
1452 100, /* number of parallel prefetches */
1453 2, /* Branch cost */
1454 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1455 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1456 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1457 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1458 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1459 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1462 4, /* scalar_stmt_cost. */
1463 2, /* scalar load_cost. */
1464 2, /* scalar_store_cost. */
1465 6, /* vec_stmt_cost. */
1466 0, /* vec_to_scalar_cost. */
1467 2, /* scalar_to_vec_cost. */
1468 2, /* vec_align_load_cost. */
1469 2, /* vec_unalign_load_cost. */
1470 2, /* vec_store_cost. */
1471 2, /* cond_taken_branch_cost. */
1472 1, /* cond_not_taken_branch_cost. */
1475 static stringop_algs pentium4_memcpy[2] = {
1476 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1477 DUMMY_STRINGOP_ALGS};
1478 static stringop_algs pentium4_memset[2] = {
1479 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1480 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1481 DUMMY_STRINGOP_ALGS};
1484 struct processor_costs pentium4_cost = {
1485 COSTS_N_INSNS (1), /* cost of an add instruction */
1486 COSTS_N_INSNS (3), /* cost of a lea instruction */
1487 COSTS_N_INSNS (4), /* variable shift costs */
1488 COSTS_N_INSNS (4), /* constant shift costs */
1489 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1490 COSTS_N_INSNS (15), /* HI */
1491 COSTS_N_INSNS (15), /* SI */
1492 COSTS_N_INSNS (15), /* DI */
1493 COSTS_N_INSNS (15)}, /* other */
1494 0, /* cost of multiply per each bit set */
1495 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1496 COSTS_N_INSNS (56), /* HI */
1497 COSTS_N_INSNS (56), /* SI */
1498 COSTS_N_INSNS (56), /* DI */
1499 COSTS_N_INSNS (56)}, /* other */
1500 COSTS_N_INSNS (1), /* cost of movsx */
1501 COSTS_N_INSNS (1), /* cost of movzx */
1502 16, /* "large" insn */
1504 2, /* cost for loading QImode using movzbl */
1505 {4, 5, 4}, /* cost of loading integer registers
1506 in QImode, HImode and SImode.
1507 Relative to reg-reg move (2). */
1508 {2, 3, 2}, /* cost of storing integer registers */
1509 2, /* cost of reg,reg fld/fst */
1510 {2, 2, 6}, /* cost of loading fp registers
1511 in SFmode, DFmode and XFmode */
1512 {4, 4, 6}, /* cost of storing fp registers
1513 in SFmode, DFmode and XFmode */
1514 2, /* cost of moving MMX register */
1515 {2, 2}, /* cost of loading MMX registers
1516 in SImode and DImode */
1517 {2, 2}, /* cost of storing MMX registers
1518 in SImode and DImode */
1519 12, /* cost of moving SSE register */
1520 {12, 12, 12}, /* cost of loading SSE registers
1521 in SImode, DImode and TImode */
1522 {2, 2, 8}, /* cost of storing SSE registers
1523 in SImode, DImode and TImode */
1524 10, /* MMX or SSE register to integer */
1525 8, /* size of l1 cache. */
1526 256, /* size of l2 cache. */
1527 64, /* size of prefetch block */
1528 6, /* number of parallel prefetches */
1529 2, /* Branch cost */
1530 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1531 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1532 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1533 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1534 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1535 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1538 1, /* scalar_stmt_cost. */
1539 1, /* scalar load_cost. */
1540 1, /* scalar_store_cost. */
1541 1, /* vec_stmt_cost. */
1542 1, /* vec_to_scalar_cost. */
1543 1, /* scalar_to_vec_cost. */
1544 1, /* vec_align_load_cost. */
1545 2, /* vec_unalign_load_cost. */
1546 1, /* vec_store_cost. */
1547 3, /* cond_taken_branch_cost. */
1548 1, /* cond_not_taken_branch_cost. */
1551 static stringop_algs nocona_memcpy[2] = {
1552 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1553 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1554 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1556 static stringop_algs nocona_memset[2] = {
1557 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1558 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1559 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1560 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1563 struct processor_costs nocona_cost = {
1564 COSTS_N_INSNS (1), /* cost of an add instruction */
1565 COSTS_N_INSNS (1), /* cost of a lea instruction */
1566 COSTS_N_INSNS (1), /* variable shift costs */
1567 COSTS_N_INSNS (1), /* constant shift costs */
1568 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1569 COSTS_N_INSNS (10), /* HI */
1570 COSTS_N_INSNS (10), /* SI */
1571 COSTS_N_INSNS (10), /* DI */
1572 COSTS_N_INSNS (10)}, /* other */
1573 0, /* cost of multiply per each bit set */
1574 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1575 COSTS_N_INSNS (66), /* HI */
1576 COSTS_N_INSNS (66), /* SI */
1577 COSTS_N_INSNS (66), /* DI */
1578 COSTS_N_INSNS (66)}, /* other */
1579 COSTS_N_INSNS (1), /* cost of movsx */
1580 COSTS_N_INSNS (1), /* cost of movzx */
1581 16, /* "large" insn */
1582 17, /* MOVE_RATIO */
1583 4, /* cost for loading QImode using movzbl */
1584 {4, 4, 4}, /* cost of loading integer registers
1585 in QImode, HImode and SImode.
1586 Relative to reg-reg move (2). */
1587 {4, 4, 4}, /* cost of storing integer registers */
1588 3, /* cost of reg,reg fld/fst */
1589 {12, 12, 12}, /* cost of loading fp registers
1590 in SFmode, DFmode and XFmode */
1591 {4, 4, 4}, /* cost of storing fp registers
1592 in SFmode, DFmode and XFmode */
1593 6, /* cost of moving MMX register */
1594 {12, 12}, /* cost of loading MMX registers
1595 in SImode and DImode */
1596 {12, 12}, /* cost of storing MMX registers
1597 in SImode and DImode */
1598 6, /* cost of moving SSE register */
1599 {12, 12, 12}, /* cost of loading SSE registers
1600 in SImode, DImode and TImode */
1601 {12, 12, 12}, /* cost of storing SSE registers
1602 in SImode, DImode and TImode */
1603 8, /* MMX or SSE register to integer */
1604 8, /* size of l1 cache. */
1605 1024, /* size of l2 cache. */
1606 64, /* size of prefetch block */
1607 8, /* number of parallel prefetches */
1608 1, /* Branch cost */
1609 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1610 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1611 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1612 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1613 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1614 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1617 1, /* scalar_stmt_cost. */
1618 1, /* scalar load_cost. */
1619 1, /* scalar_store_cost. */
1620 1, /* vec_stmt_cost. */
1621 1, /* vec_to_scalar_cost. */
1622 1, /* scalar_to_vec_cost. */
1623 1, /* vec_align_load_cost. */
1624 2, /* vec_unalign_load_cost. */
1625 1, /* vec_store_cost. */
1626 3, /* cond_taken_branch_cost. */
1627 1, /* cond_not_taken_branch_cost. */
1630 static stringop_algs atom_memcpy[2] = {
1631 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1632 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1633 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1634 static stringop_algs atom_memset[2] = {
1635 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1636 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1637 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1638 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1640 struct processor_costs atom_cost = {
1641 COSTS_N_INSNS (1), /* cost of an add instruction */
1642 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1643 COSTS_N_INSNS (1), /* variable shift costs */
1644 COSTS_N_INSNS (1), /* constant shift costs */
1645 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1646 COSTS_N_INSNS (4), /* HI */
1647 COSTS_N_INSNS (3), /* SI */
1648 COSTS_N_INSNS (4), /* DI */
1649 COSTS_N_INSNS (2)}, /* other */
1650 0, /* cost of multiply per each bit set */
1651 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1652 COSTS_N_INSNS (26), /* HI */
1653 COSTS_N_INSNS (42), /* SI */
1654 COSTS_N_INSNS (74), /* DI */
1655 COSTS_N_INSNS (74)}, /* other */
1656 COSTS_N_INSNS (1), /* cost of movsx */
1657 COSTS_N_INSNS (1), /* cost of movzx */
1658 8, /* "large" insn */
1659 17, /* MOVE_RATIO */
1660 4, /* cost for loading QImode using movzbl */
1661 {4, 4, 4}, /* cost of loading integer registers
1662 in QImode, HImode and SImode.
1663 Relative to reg-reg move (2). */
1664 {4, 4, 4}, /* cost of storing integer registers */
1665 4, /* cost of reg,reg fld/fst */
1666 {12, 12, 12}, /* cost of loading fp registers
1667 in SFmode, DFmode and XFmode */
1668 {6, 6, 8}, /* cost of storing fp registers
1669 in SFmode, DFmode and XFmode */
1670 2, /* cost of moving MMX register */
1671 {8, 8}, /* cost of loading MMX registers
1672 in SImode and DImode */
1673 {8, 8}, /* cost of storing MMX registers
1674 in SImode and DImode */
1675 2, /* cost of moving SSE register */
1676 {8, 8, 8}, /* cost of loading SSE registers
1677 in SImode, DImode and TImode */
1678 {8, 8, 8}, /* cost of storing SSE registers
1679 in SImode, DImode and TImode */
1680 5, /* MMX or SSE register to integer */
1681 32, /* size of l1 cache. */
1682 256, /* size of l2 cache. */
1683 64, /* size of prefetch block */
1684 6, /* number of parallel prefetches */
1685 3, /* Branch cost */
1686 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1687 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1688 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1689 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1690 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1691 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1694 1, /* scalar_stmt_cost. */
1695 1, /* scalar load_cost. */
1696 1, /* scalar_store_cost. */
1697 1, /* vec_stmt_cost. */
1698 1, /* vec_to_scalar_cost. */
1699 1, /* scalar_to_vec_cost. */
1700 1, /* vec_align_load_cost. */
1701 2, /* vec_unalign_load_cost. */
1702 1, /* vec_store_cost. */
1703 3, /* cond_taken_branch_cost. */
1704 1, /* cond_not_taken_branch_cost. */
1707 static stringop_algs slm_memcpy[2] = {
1708 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1709 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1710 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1711 static stringop_algs slm_memset[2] = {
1712 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1713 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1714 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1715 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1717 struct processor_costs slm_cost = {
1718 COSTS_N_INSNS (1), /* cost of an add instruction */
1719 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1720 COSTS_N_INSNS (1), /* variable shift costs */
1721 COSTS_N_INSNS (1), /* constant shift costs */
1722 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1723 COSTS_N_INSNS (3), /* HI */
1724 COSTS_N_INSNS (3), /* SI */
1725 COSTS_N_INSNS (4), /* DI */
1726 COSTS_N_INSNS (2)}, /* other */
1727 0, /* cost of multiply per each bit set */
1728 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1729 COSTS_N_INSNS (26), /* HI */
1730 COSTS_N_INSNS (42), /* SI */
1731 COSTS_N_INSNS (74), /* DI */
1732 COSTS_N_INSNS (74)}, /* other */
1733 COSTS_N_INSNS (1), /* cost of movsx */
1734 COSTS_N_INSNS (1), /* cost of movzx */
1735 8, /* "large" insn */
1736 17, /* MOVE_RATIO */
1737 4, /* cost for loading QImode using movzbl */
1738 {4, 4, 4}, /* cost of loading integer registers
1739 in QImode, HImode and SImode.
1740 Relative to reg-reg move (2). */
1741 {4, 4, 4}, /* cost of storing integer registers */
1742 4, /* cost of reg,reg fld/fst */
1743 {12, 12, 12}, /* cost of loading fp registers
1744 in SFmode, DFmode and XFmode */
1745 {6, 6, 8}, /* cost of storing fp registers
1746 in SFmode, DFmode and XFmode */
1747 2, /* cost of moving MMX register */
1748 {8, 8}, /* cost of loading MMX registers
1749 in SImode and DImode */
1750 {8, 8}, /* cost of storing MMX registers
1751 in SImode and DImode */
1752 2, /* cost of moving SSE register */
1753 {8, 8, 8}, /* cost of loading SSE registers
1754 in SImode, DImode and TImode */
1755 {8, 8, 8}, /* cost of storing SSE registers
1756 in SImode, DImode and TImode */
1757 5, /* MMX or SSE register to integer */
1758 32, /* size of l1 cache. */
1759 256, /* size of l2 cache. */
1760 64, /* size of prefetch block */
1761 6, /* number of parallel prefetches */
1762 3, /* Branch cost */
1763 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1764 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1765 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1766 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1767 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1768 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1771 1, /* scalar_stmt_cost. */
1772 1, /* scalar load_cost. */
1773 1, /* scalar_store_cost. */
1774 1, /* vec_stmt_cost. */
1775 4, /* vec_to_scalar_cost. */
1776 1, /* scalar_to_vec_cost. */
1777 1, /* vec_align_load_cost. */
1778 2, /* vec_unalign_load_cost. */
1779 1, /* vec_store_cost. */
1780 3, /* cond_taken_branch_cost. */
1781 1, /* cond_not_taken_branch_cost. */
1784 static stringop_algs intel_memcpy[2] = {
1785 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1786 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1787 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1788 static stringop_algs intel_memset[2] = {
1789 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1790 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1791 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1792 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1794 struct processor_costs intel_cost = {
1795 COSTS_N_INSNS (1), /* cost of an add instruction */
1796 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1797 COSTS_N_INSNS (1), /* variable shift costs */
1798 COSTS_N_INSNS (1), /* constant shift costs */
1799 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1800 COSTS_N_INSNS (3), /* HI */
1801 COSTS_N_INSNS (3), /* SI */
1802 COSTS_N_INSNS (4), /* DI */
1803 COSTS_N_INSNS (2)}, /* other */
1804 0, /* cost of multiply per each bit set */
1805 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1806 COSTS_N_INSNS (26), /* HI */
1807 COSTS_N_INSNS (42), /* SI */
1808 COSTS_N_INSNS (74), /* DI */
1809 COSTS_N_INSNS (74)}, /* other */
1810 COSTS_N_INSNS (1), /* cost of movsx */
1811 COSTS_N_INSNS (1), /* cost of movzx */
1812 8, /* "large" insn */
1813 17, /* MOVE_RATIO */
1814 4, /* cost for loading QImode using movzbl */
1815 {4, 4, 4}, /* cost of loading integer registers
1816 in QImode, HImode and SImode.
1817 Relative to reg-reg move (2). */
1818 {4, 4, 4}, /* cost of storing integer registers */
1819 4, /* cost of reg,reg fld/fst */
1820 {12, 12, 12}, /* cost of loading fp registers
1821 in SFmode, DFmode and XFmode */
1822 {6, 6, 8}, /* cost of storing fp registers
1823 in SFmode, DFmode and XFmode */
1824 2, /* cost of moving MMX register */
1825 {8, 8}, /* cost of loading MMX registers
1826 in SImode and DImode */
1827 {8, 8}, /* cost of storing MMX registers
1828 in SImode and DImode */
1829 2, /* cost of moving SSE register */
1830 {8, 8, 8}, /* cost of loading SSE registers
1831 in SImode, DImode and TImode */
1832 {8, 8, 8}, /* cost of storing SSE registers
1833 in SImode, DImode and TImode */
1834 5, /* MMX or SSE register to integer */
1835 32, /* size of l1 cache. */
1836 256, /* size of l2 cache. */
1837 64, /* size of prefetch block */
1838 6, /* number of parallel prefetches */
1839 3, /* Branch cost */
1840 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1841 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1842 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1843 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1844 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1845 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1848 1, /* scalar_stmt_cost. */
1849 1, /* scalar load_cost. */
1850 1, /* scalar_store_cost. */
1851 1, /* vec_stmt_cost. */
1852 4, /* vec_to_scalar_cost. */
1853 1, /* scalar_to_vec_cost. */
1854 1, /* vec_align_load_cost. */
1855 2, /* vec_unalign_load_cost. */
1856 1, /* vec_store_cost. */
1857 3, /* cond_taken_branch_cost. */
1858 1, /* cond_not_taken_branch_cost. */
1861 /* Generic should produce code tuned for Core-i7 (and newer chips)
1862 and btver1 (and newer chips). */
1864 static stringop_algs generic_memcpy[2] = {
1865 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1866 {-1, libcall, false}}},
1867 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1868 {-1, libcall, false}}}};
1869 static stringop_algs generic_memset[2] = {
1870 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1871 {-1, libcall, false}}},
1872 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1873 {-1, libcall, false}}}};
1875 struct processor_costs generic_cost = {
1876 COSTS_N_INSNS (1), /* cost of an add instruction */
1877 /* On all chips taken into consideration lea is 2 cycles and more. With
1878 this cost however our current implementation of synth_mult results in
1879 use of unnecessary temporary registers causing regression on several
1880 SPECfp benchmarks. */
1881 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1882 COSTS_N_INSNS (1), /* variable shift costs */
1883 COSTS_N_INSNS (1), /* constant shift costs */
1884 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1885 COSTS_N_INSNS (4), /* HI */
1886 COSTS_N_INSNS (3), /* SI */
1887 COSTS_N_INSNS (4), /* DI */
1888 COSTS_N_INSNS (2)}, /* other */
1889 0, /* cost of multiply per each bit set */
1890 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1891 COSTS_N_INSNS (26), /* HI */
1892 COSTS_N_INSNS (42), /* SI */
1893 COSTS_N_INSNS (74), /* DI */
1894 COSTS_N_INSNS (74)}, /* other */
1895 COSTS_N_INSNS (1), /* cost of movsx */
1896 COSTS_N_INSNS (1), /* cost of movzx */
1897 8, /* "large" insn */
1898 17, /* MOVE_RATIO */
1899 4, /* cost for loading QImode using movzbl */
1900 {4, 4, 4}, /* cost of loading integer registers
1901 in QImode, HImode and SImode.
1902 Relative to reg-reg move (2). */
1903 {4, 4, 4}, /* cost of storing integer registers */
1904 4, /* cost of reg,reg fld/fst */
1905 {12, 12, 12}, /* cost of loading fp registers
1906 in SFmode, DFmode and XFmode */
1907 {6, 6, 8}, /* cost of storing fp registers
1908 in SFmode, DFmode and XFmode */
1909 2, /* cost of moving MMX register */
1910 {8, 8}, /* cost of loading MMX registers
1911 in SImode and DImode */
1912 {8, 8}, /* cost of storing MMX registers
1913 in SImode and DImode */
1914 2, /* cost of moving SSE register */
1915 {8, 8, 8}, /* cost of loading SSE registers
1916 in SImode, DImode and TImode */
1917 {8, 8, 8}, /* cost of storing SSE registers
1918 in SImode, DImode and TImode */
1919 5, /* MMX or SSE register to integer */
1920 32, /* size of l1 cache. */
1921 512, /* size of l2 cache. */
1922 64, /* size of prefetch block */
1923 6, /* number of parallel prefetches */
1924 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1925 value is increased to perhaps more appropriate value of 5. */
1926 3, /* Branch cost */
1927 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1928 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1929 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1930 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1931 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1932 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1935 1, /* scalar_stmt_cost. */
1936 1, /* scalar load_cost. */
1937 1, /* scalar_store_cost. */
1938 1, /* vec_stmt_cost. */
1939 1, /* vec_to_scalar_cost. */
1940 1, /* scalar_to_vec_cost. */
1941 1, /* vec_align_load_cost. */
1942 2, /* vec_unalign_load_cost. */
1943 1, /* vec_store_cost. */
1944 3, /* cond_taken_branch_cost. */
1945 1, /* cond_not_taken_branch_cost. */
1948 /* core_cost should produce code tuned for Core familly of CPUs. */
1949 static stringop_algs core_memcpy[2] = {
1950 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1951 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1952 {-1, libcall, false}}}};
1953 static stringop_algs core_memset[2] = {
1954 {libcall, {{6, loop_1_byte, true},
1956 {8192, rep_prefix_4_byte, true},
1957 {-1, libcall, false}}},
1958 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1959 {-1, libcall, false}}}};
1962 struct processor_costs core_cost = {
1963 COSTS_N_INSNS (1), /* cost of an add instruction */
1964 /* On all chips taken into consideration lea is 2 cycles and more. With
1965 this cost however our current implementation of synth_mult results in
1966 use of unnecessary temporary registers causing regression on several
1967 SPECfp benchmarks. */
1968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1969 COSTS_N_INSNS (1), /* variable shift costs */
1970 COSTS_N_INSNS (1), /* constant shift costs */
1971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1972 COSTS_N_INSNS (4), /* HI */
1973 COSTS_N_INSNS (3), /* SI */
1974 COSTS_N_INSNS (4), /* DI */
1975 COSTS_N_INSNS (2)}, /* other */
1976 0, /* cost of multiply per each bit set */
1977 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1978 COSTS_N_INSNS (26), /* HI */
1979 COSTS_N_INSNS (42), /* SI */
1980 COSTS_N_INSNS (74), /* DI */
1981 COSTS_N_INSNS (74)}, /* other */
1982 COSTS_N_INSNS (1), /* cost of movsx */
1983 COSTS_N_INSNS (1), /* cost of movzx */
1984 8, /* "large" insn */
1985 17, /* MOVE_RATIO */
1986 4, /* cost for loading QImode using movzbl */
1987 {4, 4, 4}, /* cost of loading integer registers
1988 in QImode, HImode and SImode.
1989 Relative to reg-reg move (2). */
1990 {4, 4, 4}, /* cost of storing integer registers */
1991 4, /* cost of reg,reg fld/fst */
1992 {12, 12, 12}, /* cost of loading fp registers
1993 in SFmode, DFmode and XFmode */
1994 {6, 6, 8}, /* cost of storing fp registers
1995 in SFmode, DFmode and XFmode */
1996 2, /* cost of moving MMX register */
1997 {8, 8}, /* cost of loading MMX registers
1998 in SImode and DImode */
1999 {8, 8}, /* cost of storing MMX registers
2000 in SImode and DImode */
2001 2, /* cost of moving SSE register */
2002 {8, 8, 8}, /* cost of loading SSE registers
2003 in SImode, DImode and TImode */
2004 {8, 8, 8}, /* cost of storing SSE registers
2005 in SImode, DImode and TImode */
2006 5, /* MMX or SSE register to integer */
2007 64, /* size of l1 cache. */
2008 512, /* size of l2 cache. */
2009 64, /* size of prefetch block */
2010 6, /* number of parallel prefetches */
2011 /* FIXME perhaps more appropriate value is 5. */
2012 3, /* Branch cost */
2013 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2014 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2015 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2016 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2017 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2018 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2021 1, /* scalar_stmt_cost. */
2022 1, /* scalar load_cost. */
2023 1, /* scalar_store_cost. */
2024 1, /* vec_stmt_cost. */
2025 1, /* vec_to_scalar_cost. */
2026 1, /* scalar_to_vec_cost. */
2027 1, /* vec_align_load_cost. */
2028 2, /* vec_unalign_load_cost. */
2029 1, /* vec_store_cost. */
2030 3, /* cond_taken_branch_cost. */
2031 1, /* cond_not_taken_branch_cost. */
2035 /* Set by -mtune. */
2036 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2038 /* Set by -mtune or -Os. */
2039 const struct processor_costs *ix86_cost = &pentium_cost;
2041 /* Processor feature/optimization bitmasks. */
2042 #define m_386 (1<<PROCESSOR_I386)
2043 #define m_486 (1<<PROCESSOR_I486)
2044 #define m_PENT (1<<PROCESSOR_PENTIUM)
2045 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2046 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2047 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2048 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2049 #define m_CORE2 (1<<PROCESSOR_CORE2)
2050 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2051 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2052 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2053 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2054 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2055 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2056 #define m_KNL (1<<PROCESSOR_KNL)
2057 #define m_INTEL (1<<PROCESSOR_INTEL)
2059 #define m_GEODE (1<<PROCESSOR_GEODE)
2060 #define m_K6 (1<<PROCESSOR_K6)
2061 #define m_K6_GEODE (m_K6 | m_GEODE)
2062 #define m_K8 (1<<PROCESSOR_K8)
2063 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2064 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2065 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2066 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2067 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2068 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2069 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2070 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2071 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2072 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2073 #define m_BTVER (m_BTVER1 | m_BTVER2)
2074 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2076 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2078 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2080 #define DEF_TUNE(tune, name, selector) name,
2081 #include "x86-tune.def"
2085 /* Feature tests against the various tunings. */
2086 unsigned char ix86_tune_features[X86_TUNE_LAST];
2088 /* Feature tests against the various tunings used to create ix86_tune_features
2089 based on the processor mask. */
2090 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2092 #define DEF_TUNE(tune, name, selector) selector,
2093 #include "x86-tune.def"
2097 /* Feature tests against the various architecture variations. */
2098 unsigned char ix86_arch_features[X86_ARCH_LAST];
2100 /* Feature tests against the various architecture variations, used to create
2101 ix86_arch_features based on the processor mask. */
2102 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2103 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2104 ~(m_386 | m_486 | m_PENT | m_K6),
2106 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2109 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2112 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2115 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2119 /* In case the average insn count for single function invocation is
2120 lower than this constant, emit fast (but longer) prologue and
2122 #define FAST_PROLOGUE_INSN_COUNT 20
2124 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2125 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2126 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2127 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2129 /* Array of the smallest class containing reg number REGNO, indexed by
2130 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2132 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2134 /* ax, dx, cx, bx */
2135 AREG, DREG, CREG, BREG,
2136 /* si, di, bp, sp */
2137 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2139 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2140 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2143 /* flags, fpsr, fpcr, frame */
2144 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2146 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2149 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2152 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2153 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2154 /* SSE REX registers */
2155 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2157 /* AVX-512 SSE registers */
2158 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2159 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2160 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2161 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2162 /* Mask registers. */
2163 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2164 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2165 /* MPX bound registers */
2166 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2169 /* The "default" register map used in 32bit mode. */
2171 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2173 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2174 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2175 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2176 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2177 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2178 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2179 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2180 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2181 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2182 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2183 101, 102, 103, 104, /* bound registers */
2186 /* The "default" register map used in 64bit mode. */
2188 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2190 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2191 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2192 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2193 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2194 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2195 8,9,10,11,12,13,14,15, /* extended integer registers */
2196 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2197 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2198 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2199 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2200 126, 127, 128, 129, /* bound registers */
2203 /* Define the register numbers to be used in Dwarf debugging information.
2204 The SVR4 reference port C compiler uses the following register numbers
2205 in its Dwarf output code:
2206 0 for %eax (gcc regno = 0)
2207 1 for %ecx (gcc regno = 2)
2208 2 for %edx (gcc regno = 1)
2209 3 for %ebx (gcc regno = 3)
2210 4 for %esp (gcc regno = 7)
2211 5 for %ebp (gcc regno = 6)
2212 6 for %esi (gcc regno = 4)
2213 7 for %edi (gcc regno = 5)
2214 The following three DWARF register numbers are never generated by
2215 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2216 believes these numbers have these meanings.
2217 8 for %eip (no gcc equivalent)
2218 9 for %eflags (gcc regno = 17)
2219 10 for %trapno (no gcc equivalent)
2220 It is not at all clear how we should number the FP stack registers
2221 for the x86 architecture. If the version of SDB on x86/svr4 were
2222 a bit less brain dead with respect to floating-point then we would
2223 have a precedent to follow with respect to DWARF register numbers
2224 for x86 FP registers, but the SDB on x86/svr4 is so completely
2225 broken with respect to FP registers that it is hardly worth thinking
2226 of it as something to strive for compatibility with.
2227 The version of x86/svr4 SDB I have at the moment does (partially)
2228 seem to believe that DWARF register number 11 is associated with
2229 the x86 register %st(0), but that's about all. Higher DWARF
2230 register numbers don't seem to be associated with anything in
2231 particular, and even for DWARF regno 11, SDB only seems to under-
2232 stand that it should say that a variable lives in %st(0) (when
2233 asked via an `=' command) if we said it was in DWARF regno 11,
2234 but SDB still prints garbage when asked for the value of the
2235 variable in question (via a `/' command).
2236 (Also note that the labels SDB prints for various FP stack regs
2237 when doing an `x' command are all wrong.)
2238 Note that these problems generally don't affect the native SVR4
2239 C compiler because it doesn't allow the use of -O with -g and
2240 because when it is *not* optimizing, it allocates a memory
2241 location for each floating-point variable, and the memory
2242 location is what gets described in the DWARF AT_location
2243 attribute for the variable in question.
2244 Regardless of the severe mental illness of the x86/svr4 SDB, we
2245 do something sensible here and we use the following DWARF
2246 register numbers. Note that these are all stack-top-relative
2248 11 for %st(0) (gcc regno = 8)
2249 12 for %st(1) (gcc regno = 9)
2250 13 for %st(2) (gcc regno = 10)
2251 14 for %st(3) (gcc regno = 11)
2252 15 for %st(4) (gcc regno = 12)
2253 16 for %st(5) (gcc regno = 13)
2254 17 for %st(6) (gcc regno = 14)
2255 18 for %st(7) (gcc regno = 15)
2257 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2259 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2260 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2261 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2262 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2263 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2264 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2265 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2267 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2268 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2269 101, 102, 103, 104, /* bound registers */
2272 /* Define parameter passing and return registers. */
2274 static int const x86_64_int_parameter_registers[6] =
2276 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2279 static int const x86_64_ms_abi_int_parameter_registers[4] =
2281 CX_REG, DX_REG, R8_REG, R9_REG
2284 static int const x86_64_int_return_registers[4] =
2286 AX_REG, DX_REG, DI_REG, SI_REG
2289 /* Additional registers that are clobbered by SYSV calls. */
2291 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2295 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2296 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2299 /* Define the structure for the machine field in struct function. */
2301 struct GTY(()) stack_local_entry {
2302 unsigned short mode;
2305 struct stack_local_entry *next;
2308 /* Structure describing stack frame layout.
2309 Stack grows downward:
2315 saved static chain if ix86_static_chain_on_stack
2317 saved frame pointer if frame_pointer_needed
2318 <- HARD_FRAME_POINTER
2324 <- sse_regs_save_offset
2327 [va_arg registers] |
2331 [padding2] | = to_allocate
2340 int outgoing_arguments_size;
2342 /* The offsets relative to ARG_POINTER. */
2343 HOST_WIDE_INT frame_pointer_offset;
2344 HOST_WIDE_INT hard_frame_pointer_offset;
2345 HOST_WIDE_INT stack_pointer_offset;
2346 HOST_WIDE_INT hfp_save_offset;
2347 HOST_WIDE_INT reg_save_offset;
2348 HOST_WIDE_INT sse_reg_save_offset;
2350 /* When save_regs_using_mov is set, emit prologue using
2351 move instead of push instructions. */
2352 bool save_regs_using_mov;
2355 /* Which cpu are we scheduling for. */
2356 enum attr_cpu ix86_schedule;
2358 /* Which cpu are we optimizing for. */
2359 enum processor_type ix86_tune;
2361 /* Which instruction set architecture to use. */
2362 enum processor_type ix86_arch;
2364 /* True if processor has SSE prefetch instruction. */
2365 unsigned char x86_prefetch_sse;
2367 /* -mstackrealign option */
2368 static const char ix86_force_align_arg_pointer_string[]
2369 = "force_align_arg_pointer";
2371 static rtx (*ix86_gen_leave) (void);
2372 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2373 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2374 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2375 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2376 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2377 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2378 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2379 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2380 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2381 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2382 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2384 /* Preferred alignment for stack boundary in bits. */
2385 unsigned int ix86_preferred_stack_boundary;
2387 /* Alignment for incoming stack boundary in bits specified at
2389 static unsigned int ix86_user_incoming_stack_boundary;
2391 /* Default alignment for incoming stack boundary in bits. */
2392 static unsigned int ix86_default_incoming_stack_boundary;
2394 /* Alignment for incoming stack boundary in bits. */
2395 unsigned int ix86_incoming_stack_boundary;
2397 /* Calling abi specific va_list type nodes. */
2398 static GTY(()) tree sysv_va_list_type_node;
2399 static GTY(()) tree ms_va_list_type_node;
2401 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2402 char internal_label_prefix[16];
2403 int internal_label_prefix_len;
2405 /* Fence to use after loop using movnt. */
2408 /* Register class used for passing given 64bit part of the argument.
2409 These represent classes as documented by the PS ABI, with the exception
2410 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2411 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2413 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2414 whenever possible (upper half does contain padding). */
2415 enum x86_64_reg_class
2418 X86_64_INTEGER_CLASS,
2419 X86_64_INTEGERSI_CLASS,
2426 X86_64_COMPLEX_X87_CLASS,
2430 #define MAX_CLASSES 8
2432 /* Table of constants used by fldpi, fldln2, etc.... */
2433 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2434 static bool ext_80387_constants_init = 0;
2437 static struct machine_function * ix86_init_machine_status (void);
2438 static rtx ix86_function_value (const_tree, const_tree, bool);
2439 static bool ix86_function_value_regno_p (const unsigned int);
2440 static unsigned int ix86_function_arg_boundary (machine_mode,
2442 static rtx ix86_static_chain (const_tree, bool);
2443 static int ix86_function_regparm (const_tree, const_tree);
2444 static void ix86_compute_frame_layout (struct ix86_frame *);
2445 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2447 static void ix86_add_new_builtins (HOST_WIDE_INT);
2448 static tree ix86_canonical_va_list_type (tree);
2449 static void predict_jump (int);
2450 static unsigned int split_stack_prologue_scratch_regno (void);
2451 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2453 enum ix86_function_specific_strings
2455 IX86_FUNCTION_SPECIFIC_ARCH,
2456 IX86_FUNCTION_SPECIFIC_TUNE,
2457 IX86_FUNCTION_SPECIFIC_MAX
2460 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2461 const char *, enum fpmath_unit, bool);
2462 static void ix86_function_specific_save (struct cl_target_option *,
2463 struct gcc_options *opts);
2464 static void ix86_function_specific_restore (struct gcc_options *opts,
2465 struct cl_target_option *);
2466 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2467 static void ix86_function_specific_print (FILE *, int,
2468 struct cl_target_option *);
2469 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2470 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2471 struct gcc_options *,
2472 struct gcc_options *,
2473 struct gcc_options *);
2474 static bool ix86_can_inline_p (tree, tree);
2475 static void ix86_set_current_function (tree);
2476 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2478 static enum calling_abi ix86_function_abi (const_tree);
2481 #ifndef SUBTARGET32_DEFAULT_CPU
2482 #define SUBTARGET32_DEFAULT_CPU "i386"
2485 /* Whether -mtune= or -march= were specified */
2486 static int ix86_tune_defaulted;
2487 static int ix86_arch_specified;
2489 /* Vectorization library interface and handlers. */
2490 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2492 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2493 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2495 /* Processor target table, indexed by processor number */
2498 const char *const name; /* processor name */
2499 const struct processor_costs *cost; /* Processor costs */
2500 const int align_loop; /* Default alignments. */
2501 const int align_loop_max_skip;
2502 const int align_jump;
2503 const int align_jump_max_skip;
2504 const int align_func;
2507 /* This table must be in sync with enum processor_type in i386.h. */
2508 static const struct ptt processor_target_table[PROCESSOR_max] =
2510 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2511 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2512 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2513 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2514 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2515 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2516 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2517 {"core2", &core_cost, 16, 10, 16, 10, 16},
2518 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2519 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2520 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2521 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2522 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2523 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2524 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2525 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2526 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2527 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2528 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2529 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2530 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2531 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2532 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2533 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2534 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2535 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2539 rest_of_handle_insert_vzeroupper (void)
2543 /* vzeroupper instructions are inserted immediately after reload to
2544 account for possible spills from 256bit registers. The pass
2545 reuses mode switching infrastructure by re-running mode insertion
2546 pass, so disable entities that have already been processed. */
2547 for (i = 0; i < MAX_386_ENTITIES; i++)
2548 ix86_optimize_mode_switching[i] = 0;
2550 ix86_optimize_mode_switching[AVX_U128] = 1;
2552 /* Call optimize_mode_switching. */
2553 g->get_passes ()->execute_pass_mode_switching ();
2559 const pass_data pass_data_insert_vzeroupper =
2561 RTL_PASS, /* type */
2562 "vzeroupper", /* name */
2563 OPTGROUP_NONE, /* optinfo_flags */
2564 TV_NONE, /* tv_id */
2565 0, /* properties_required */
2566 0, /* properties_provided */
2567 0, /* properties_destroyed */
2568 0, /* todo_flags_start */
2569 TODO_df_finish, /* todo_flags_finish */
2572 class pass_insert_vzeroupper : public rtl_opt_pass
2575 pass_insert_vzeroupper(gcc::context *ctxt)
2576 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2579 /* opt_pass methods: */
2580 virtual bool gate (function *)
2582 return TARGET_AVX && !TARGET_AVX512F
2583 && TARGET_VZEROUPPER && flag_expensive_optimizations
2587 virtual unsigned int execute (function *)
2589 return rest_of_handle_insert_vzeroupper ();
2592 }; // class pass_insert_vzeroupper
2597 make_pass_insert_vzeroupper (gcc::context *ctxt)
2599 return new pass_insert_vzeroupper (ctxt);
2602 /* Return true if a red-zone is in use. */
2605 ix86_using_red_zone (void)
2607 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2610 /* Return a string that documents the current -m options. The caller is
2611 responsible for freeing the string. */
2614 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2615 const char *tune, enum fpmath_unit fpmath,
2618 struct ix86_target_opts
2620 const char *option; /* option string */
2621 HOST_WIDE_INT mask; /* isa mask options */
2624 /* This table is ordered so that options like -msse4.2 that imply
2625 preceding options while match those first. */
2626 static struct ix86_target_opts isa_opts[] =
2628 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2629 { "-mfma", OPTION_MASK_ISA_FMA },
2630 { "-mxop", OPTION_MASK_ISA_XOP },
2631 { "-mlwp", OPTION_MASK_ISA_LWP },
2632 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2633 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2634 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2635 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2636 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2637 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2638 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2639 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2640 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2641 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2642 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2643 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2644 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2645 { "-msse3", OPTION_MASK_ISA_SSE3 },
2646 { "-msse2", OPTION_MASK_ISA_SSE2 },
2647 { "-msse", OPTION_MASK_ISA_SSE },
2648 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2649 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2650 { "-mmmx", OPTION_MASK_ISA_MMX },
2651 { "-mabm", OPTION_MASK_ISA_ABM },
2652 { "-mbmi", OPTION_MASK_ISA_BMI },
2653 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2654 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2655 { "-mhle", OPTION_MASK_ISA_HLE },
2656 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2657 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2658 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2659 { "-madx", OPTION_MASK_ISA_ADX },
2660 { "-mtbm", OPTION_MASK_ISA_TBM },
2661 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2662 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2663 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2664 { "-maes", OPTION_MASK_ISA_AES },
2665 { "-msha", OPTION_MASK_ISA_SHA },
2666 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2667 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2668 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2669 { "-mf16c", OPTION_MASK_ISA_F16C },
2670 { "-mrtm", OPTION_MASK_ISA_RTM },
2671 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2672 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2673 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2674 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2675 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2676 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2677 { "-mmpx", OPTION_MASK_ISA_MPX },
2678 { "-mclwb", OPTION_MASK_ISA_CLWB },
2679 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2683 static struct ix86_target_opts flag_opts[] =
2685 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2686 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2687 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2688 { "-m80387", MASK_80387 },
2689 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2690 { "-malign-double", MASK_ALIGN_DOUBLE },
2691 { "-mcld", MASK_CLD },
2692 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2693 { "-mieee-fp", MASK_IEEE_FP },
2694 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2695 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2696 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2697 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2698 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2699 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2700 { "-mno-red-zone", MASK_NO_RED_ZONE },
2701 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2702 { "-mrecip", MASK_RECIP },
2703 { "-mrtd", MASK_RTD },
2704 { "-msseregparm", MASK_SSEREGPARM },
2705 { "-mstack-arg-probe", MASK_STACK_PROBE },
2706 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2707 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2708 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2709 { "-mvzeroupper", MASK_VZEROUPPER },
2710 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2711 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2712 { "-mprefer-avx128", MASK_PREFER_AVX128},
2715 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2718 char target_other[40];
2728 memset (opts, '\0', sizeof (opts));
2730 /* Add -march= option. */
2733 opts[num][0] = "-march=";
2734 opts[num++][1] = arch;
2737 /* Add -mtune= option. */
2740 opts[num][0] = "-mtune=";
2741 opts[num++][1] = tune;
2744 /* Add -m32/-m64/-mx32. */
2745 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2747 if ((isa & OPTION_MASK_ABI_64) != 0)
2751 isa &= ~ (OPTION_MASK_ISA_64BIT
2752 | OPTION_MASK_ABI_64
2753 | OPTION_MASK_ABI_X32);
2757 opts[num++][0] = abi;
2759 /* Pick out the options in isa options. */
2760 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2762 if ((isa & isa_opts[i].mask) != 0)
2764 opts[num++][0] = isa_opts[i].option;
2765 isa &= ~ isa_opts[i].mask;
2769 if (isa && add_nl_p)
2771 opts[num++][0] = isa_other;
2772 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2776 /* Add flag options. */
2777 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2779 if ((flags & flag_opts[i].mask) != 0)
2781 opts[num++][0] = flag_opts[i].option;
2782 flags &= ~ flag_opts[i].mask;
2786 if (flags && add_nl_p)
2788 opts[num++][0] = target_other;
2789 sprintf (target_other, "(other flags: %#x)", flags);
2792 /* Add -fpmath= option. */
2795 opts[num][0] = "-mfpmath=";
2796 switch ((int) fpmath)
2799 opts[num++][1] = "387";
2803 opts[num++][1] = "sse";
2806 case FPMATH_387 | FPMATH_SSE:
2807 opts[num++][1] = "sse+387";
2819 gcc_assert (num < ARRAY_SIZE (opts));
2821 /* Size the string. */
2823 sep_len = (add_nl_p) ? 3 : 1;
2824 for (i = 0; i < num; i++)
2827 for (j = 0; j < 2; j++)
2829 len += strlen (opts[i][j]);
2832 /* Build the string. */
2833 ret = ptr = (char *) xmalloc (len);
2836 for (i = 0; i < num; i++)
2840 for (j = 0; j < 2; j++)
2841 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2848 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2856 for (j = 0; j < 2; j++)
2859 memcpy (ptr, opts[i][j], len2[j]);
2861 line_len += len2[j];
2866 gcc_assert (ret + len >= ptr);
2871 /* Return true, if profiling code should be emitted before
2872 prologue. Otherwise it returns false.
2873 Note: For x86 with "hotfix" it is sorried. */
2875 ix86_profile_before_prologue (void)
2877 return flag_fentry != 0;
2880 /* Function that is callable from the debugger to print the current
2882 void ATTRIBUTE_UNUSED
2883 ix86_debug_options (void)
2885 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2886 ix86_arch_string, ix86_tune_string,
2891 fprintf (stderr, "%s\n\n", opts);
2895 fputs ("<no options>\n\n", stderr);
2900 static const char *stringop_alg_names[] = {
2902 #define DEF_ALG(alg, name) #name,
2903 #include "stringop.def"
2908 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2909 The string is of the following form (or comma separated list of it):
2911 strategy_alg:max_size:[align|noalign]
2913 where the full size range for the strategy is either [0, max_size] or
2914 [min_size, max_size], in which min_size is the max_size + 1 of the
2915 preceding range. The last size range must have max_size == -1.
2920 -mmemcpy-strategy=libcall:-1:noalign
2922 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2926 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2928 This is to tell the compiler to use the following strategy for memset
2929 1) when the expected size is between [1, 16], use rep_8byte strategy;
2930 2) when the size is between [17, 2048], use vector_loop;
2931 3) when the size is > 2048, use libcall. */
2933 struct stringop_size_range
2941 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2943 const struct stringop_algs *default_algs;
2944 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2945 char *curr_range_str, *next_range_str;
2949 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2951 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2953 curr_range_str = strategy_str;
2960 next_range_str = strchr (curr_range_str, ',');
2962 *next_range_str++ = '\0';
2964 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2965 alg_name, &maxs, align))
2967 error ("wrong arg %s to option %s", curr_range_str,
2968 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2972 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2974 error ("size ranges of option %s should be increasing",
2975 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2979 for (i = 0; i < last_alg; i++)
2980 if (!strcmp (alg_name, stringop_alg_names[i]))
2985 error ("wrong stringop strategy name %s specified for option %s",
2987 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2991 input_ranges[n].max = maxs;
2992 input_ranges[n].alg = (stringop_alg) i;
2993 if (!strcmp (align, "align"))
2994 input_ranges[n].noalign = false;
2995 else if (!strcmp (align, "noalign"))
2996 input_ranges[n].noalign = true;
2999 error ("unknown alignment %s specified for option %s",
3000 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3004 curr_range_str = next_range_str;
3006 while (curr_range_str);
3008 if (input_ranges[n - 1].max != -1)
3010 error ("the max value for the last size range should be -1"
3012 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3016 if (n > MAX_STRINGOP_ALGS)
3018 error ("too many size ranges specified in option %s",
3019 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3023 /* Now override the default algs array. */
3024 for (i = 0; i < n; i++)
3026 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3027 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3028 = input_ranges[i].alg;
3029 *const_cast<int *>(&default_algs->size[i].noalign)
3030 = input_ranges[i].noalign;
3035 /* parse -mtune-ctrl= option. When DUMP is true,
3036 print the features that are explicitly set. */
3039 parse_mtune_ctrl_str (bool dump)
3041 if (!ix86_tune_ctrl_string)
3044 char *next_feature_string = NULL;
3045 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3046 char *orig = curr_feature_string;
3052 next_feature_string = strchr (curr_feature_string, ',');
3053 if (next_feature_string)
3054 *next_feature_string++ = '\0';
3055 if (*curr_feature_string == '^')
3057 curr_feature_string++;
3060 for (i = 0; i < X86_TUNE_LAST; i++)
3062 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3064 ix86_tune_features[i] = !clear;
3066 fprintf (stderr, "Explicitly %s feature %s\n",
3067 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3071 if (i == X86_TUNE_LAST)
3072 error ("Unknown parameter to option -mtune-ctrl: %s",
3073 clear ? curr_feature_string - 1 : curr_feature_string);
3074 curr_feature_string = next_feature_string;
3076 while (curr_feature_string);
3080 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3084 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3086 unsigned int ix86_tune_mask = 1u << ix86_tune;
3089 for (i = 0; i < X86_TUNE_LAST; ++i)
3091 if (ix86_tune_no_default)
3092 ix86_tune_features[i] = 0;
3094 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3099 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3100 for (i = 0; i < X86_TUNE_LAST; i++)
3101 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3102 ix86_tune_features[i] ? "on" : "off");
3105 parse_mtune_ctrl_str (dump);
3109 /* Override various settings based on options. If MAIN_ARGS_P, the
3110 options are from the command line, otherwise they are from
3114 ix86_option_override_internal (bool main_args_p,
3115 struct gcc_options *opts,
3116 struct gcc_options *opts_set)
3119 unsigned int ix86_arch_mask;
3120 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3125 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3126 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3127 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3128 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3129 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3130 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3131 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3132 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3133 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3134 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3135 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3136 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3137 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3138 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3139 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3140 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3141 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3142 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3143 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3144 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3145 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3146 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3147 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3148 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3149 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3150 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3151 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3152 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3153 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3154 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3155 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3156 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3157 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3158 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3159 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3160 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3161 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3162 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3163 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3164 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3165 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3166 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3167 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3168 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3169 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3170 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3171 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3172 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3173 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3174 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3175 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3176 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3177 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3178 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3179 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3180 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3181 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3184 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3185 | PTA_CX16 | PTA_FXSR)
3186 #define PTA_NEHALEM \
3187 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3188 #define PTA_WESTMERE \
3189 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3190 #define PTA_SANDYBRIDGE \
3191 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3192 #define PTA_IVYBRIDGE \
3193 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3194 #define PTA_HASWELL \
3195 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3196 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3197 #define PTA_BROADWELL \
3198 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3200 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3201 #define PTA_BONNELL \
3202 (PTA_CORE2 | PTA_MOVBE)
3203 #define PTA_SILVERMONT \
3204 (PTA_WESTMERE | PTA_MOVBE)
3206 /* if this reaches 64, need to widen struct pta flags below */
3210 const char *const name; /* processor name or nickname. */
3211 const enum processor_type processor;
3212 const enum attr_cpu schedule;
3213 const unsigned HOST_WIDE_INT flags;
3215 const processor_alias_table[] =
3217 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3218 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3219 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3220 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3221 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3222 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3223 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3224 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3225 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3226 PTA_MMX | PTA_SSE | PTA_FXSR},
3227 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3228 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3229 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3230 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3231 PTA_MMX | PTA_SSE | PTA_FXSR},
3232 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3233 PTA_MMX | PTA_SSE | PTA_FXSR},
3234 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3235 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3236 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3237 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3238 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3239 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3240 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3241 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3242 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3243 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3244 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3245 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3246 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3247 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3248 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3249 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3251 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3253 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3255 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3257 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3258 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3259 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3260 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3261 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3262 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3263 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3264 {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL},
3265 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3266 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3267 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3268 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3269 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3270 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3271 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3272 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3273 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3274 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3275 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3276 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3277 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3278 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3279 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3280 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3281 {"x86-64", PROCESSOR_K8, CPU_K8,
3282 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3283 {"k8", PROCESSOR_K8, CPU_K8,
3284 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3285 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3286 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3287 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3288 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3289 {"opteron", PROCESSOR_K8, CPU_K8,
3290 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3291 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3292 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3293 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3294 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3295 {"athlon64", PROCESSOR_K8, CPU_K8,
3296 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3297 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3298 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3299 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3300 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3301 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3302 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3303 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3304 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3305 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3306 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3307 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3308 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3309 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3310 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3311 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3312 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3313 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3314 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3315 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3316 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3317 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3318 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3319 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3320 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3321 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3322 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3323 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3324 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3325 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3326 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3327 | PTA_XSAVEOPT | PTA_FSGSBASE},
3328 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3329 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3330 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3331 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3332 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3333 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3334 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3336 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3337 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3338 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3339 | PTA_FXSR | PTA_XSAVE},
3340 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3341 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3342 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3343 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3344 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3345 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3347 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3349 | PTA_HLE /* flags are only used for -march switch. */ },
3352 /* -mrecip options. */
3355 const char *string; /* option name */
3356 unsigned int mask; /* mask bits to set */
3358 const recip_options[] =
3360 { "all", RECIP_MASK_ALL },
3361 { "none", RECIP_MASK_NONE },
3362 { "div", RECIP_MASK_DIV },
3363 { "sqrt", RECIP_MASK_SQRT },
3364 { "vec-div", RECIP_MASK_VEC_DIV },
3365 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3368 int const pta_size = ARRAY_SIZE (processor_alias_table);
3370 /* Set up prefix/suffix so the error messages refer to either the command
3371 line argument, or the attribute(target). */
3380 prefix = "option(\"";
3385 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3386 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3387 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3388 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3389 #ifdef TARGET_BI_ARCH
3392 #if TARGET_BI_ARCH == 1
3393 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3394 is on and OPTION_MASK_ABI_X32 is off. We turn off
3395 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3397 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3398 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3400 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3401 on and OPTION_MASK_ABI_64 is off. We turn off
3402 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3403 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3404 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3405 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3406 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3411 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3413 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3414 OPTION_MASK_ABI_64 for TARGET_X32. */
3415 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3416 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3418 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3419 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3420 | OPTION_MASK_ABI_X32
3421 | OPTION_MASK_ABI_64);
3422 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3424 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3425 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3426 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3427 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3430 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3431 SUBTARGET_OVERRIDE_OPTIONS;
3434 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3435 SUBSUBTARGET_OVERRIDE_OPTIONS;
3438 /* -fPIC is the default for x86_64. */
3439 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3440 opts->x_flag_pic = 2;
3442 /* Need to check -mtune=generic first. */
3443 if (opts->x_ix86_tune_string)
3445 /* As special support for cross compilers we read -mtune=native
3446 as -mtune=generic. With native compilers we won't see the
3447 -mtune=native, as it was changed by the driver. */
3448 if (!strcmp (opts->x_ix86_tune_string, "native"))
3450 opts->x_ix86_tune_string = "generic";
3452 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3453 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3454 "%stune=k8%s or %stune=generic%s instead as appropriate",
3455 prefix, suffix, prefix, suffix, prefix, suffix);
3459 if (opts->x_ix86_arch_string)
3460 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3461 if (!opts->x_ix86_tune_string)
3463 opts->x_ix86_tune_string
3464 = processor_target_table[TARGET_CPU_DEFAULT].name;
3465 ix86_tune_defaulted = 1;
3468 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3469 or defaulted. We need to use a sensible tune option. */
3470 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3472 opts->x_ix86_tune_string = "generic";
3476 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3477 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3479 /* rep; movq isn't available in 32-bit code. */
3480 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3481 opts->x_ix86_stringop_alg = no_stringop;
3484 if (!opts->x_ix86_arch_string)
3485 opts->x_ix86_arch_string
3486 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3487 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3489 ix86_arch_specified = 1;
3491 if (opts_set->x_ix86_pmode)
3493 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3494 && opts->x_ix86_pmode == PMODE_SI)
3495 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3496 && opts->x_ix86_pmode == PMODE_DI))
3497 error ("address mode %qs not supported in the %s bit mode",
3498 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3499 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3502 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3503 ? PMODE_DI : PMODE_SI;
3505 if (!opts_set->x_ix86_abi)
3506 opts->x_ix86_abi = DEFAULT_ABI;
3508 /* For targets using ms ABI enable ms-extensions, if not
3509 explicit turned off. For non-ms ABI we turn off this
3511 if (!opts_set->x_flag_ms_extensions)
3512 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3514 if (opts_set->x_ix86_cmodel)
3516 switch (opts->x_ix86_cmodel)
3520 if (opts->x_flag_pic)
3521 opts->x_ix86_cmodel = CM_SMALL_PIC;
3522 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3523 error ("code model %qs not supported in the %s bit mode",
3529 if (opts->x_flag_pic)
3530 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3531 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3532 error ("code model %qs not supported in the %s bit mode",
3534 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3535 error ("code model %qs not supported in x32 mode",
3541 if (opts->x_flag_pic)
3542 opts->x_ix86_cmodel = CM_LARGE_PIC;
3543 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3544 error ("code model %qs not supported in the %s bit mode",
3546 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3547 error ("code model %qs not supported in x32 mode",
3552 if (opts->x_flag_pic)
3553 error ("code model %s does not support PIC mode", "32");
3554 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3555 error ("code model %qs not supported in the %s bit mode",
3560 if (opts->x_flag_pic)
3562 error ("code model %s does not support PIC mode", "kernel");
3563 opts->x_ix86_cmodel = CM_32;
3565 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3566 error ("code model %qs not supported in the %s bit mode",
3576 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3577 use of rip-relative addressing. This eliminates fixups that
3578 would otherwise be needed if this object is to be placed in a
3579 DLL, and is essentially just as efficient as direct addressing. */
3580 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3581 && (TARGET_RDOS || TARGET_PECOFF))
3582 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3583 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3584 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3586 opts->x_ix86_cmodel = CM_32;
3588 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3590 error ("-masm=intel not supported in this configuration");
3591 opts->x_ix86_asm_dialect = ASM_ATT;
3593 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3594 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3595 sorry ("%i-bit mode not compiled in",
3596 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3598 for (i = 0; i < pta_size; i++)
3599 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3601 ix86_schedule = processor_alias_table[i].schedule;
3602 ix86_arch = processor_alias_table[i].processor;
3603 /* Default cpu tuning to the architecture. */
3604 ix86_tune = ix86_arch;
3606 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3607 && !(processor_alias_table[i].flags & PTA_64BIT))
3608 error ("CPU you selected does not support x86-64 "
3611 if (processor_alias_table[i].flags & PTA_MMX
3612 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3613 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3614 if (processor_alias_table[i].flags & PTA_3DNOW
3615 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3616 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3617 if (processor_alias_table[i].flags & PTA_3DNOW_A
3618 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3619 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3620 if (processor_alias_table[i].flags & PTA_SSE
3621 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3622 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3623 if (processor_alias_table[i].flags & PTA_SSE2
3624 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3625 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3626 if (processor_alias_table[i].flags & PTA_SSE3
3627 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3628 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3629 if (processor_alias_table[i].flags & PTA_SSSE3
3630 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3631 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3632 if (processor_alias_table[i].flags & PTA_SSE4_1
3633 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3634 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3635 if (processor_alias_table[i].flags & PTA_SSE4_2
3636 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3637 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3638 if (processor_alias_table[i].flags & PTA_AVX
3639 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3640 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3641 if (processor_alias_table[i].flags & PTA_AVX2
3642 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3643 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3644 if (processor_alias_table[i].flags & PTA_FMA
3645 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3646 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3647 if (processor_alias_table[i].flags & PTA_SSE4A
3648 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3649 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3650 if (processor_alias_table[i].flags & PTA_FMA4
3651 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3652 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3653 if (processor_alias_table[i].flags & PTA_XOP
3654 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3655 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3656 if (processor_alias_table[i].flags & PTA_LWP
3657 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3658 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3659 if (processor_alias_table[i].flags & PTA_ABM
3660 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3661 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3662 if (processor_alias_table[i].flags & PTA_BMI
3663 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3664 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3665 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3666 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3667 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3668 if (processor_alias_table[i].flags & PTA_TBM
3669 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3670 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3671 if (processor_alias_table[i].flags & PTA_BMI2
3672 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3673 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3674 if (processor_alias_table[i].flags & PTA_CX16
3675 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3676 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3677 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3678 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3679 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3680 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3681 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3682 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3683 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3684 if (processor_alias_table[i].flags & PTA_MOVBE
3685 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3686 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3687 if (processor_alias_table[i].flags & PTA_AES
3688 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3689 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3690 if (processor_alias_table[i].flags & PTA_SHA
3691 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3692 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3693 if (processor_alias_table[i].flags & PTA_PCLMUL
3694 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3695 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3696 if (processor_alias_table[i].flags & PTA_FSGSBASE
3697 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3698 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3699 if (processor_alias_table[i].flags & PTA_RDRND
3700 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3701 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3702 if (processor_alias_table[i].flags & PTA_F16C
3703 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3704 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3705 if (processor_alias_table[i].flags & PTA_RTM
3706 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3707 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3708 if (processor_alias_table[i].flags & PTA_HLE
3709 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3710 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3711 if (processor_alias_table[i].flags & PTA_PRFCHW
3712 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3713 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3714 if (processor_alias_table[i].flags & PTA_RDSEED
3715 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3716 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3717 if (processor_alias_table[i].flags & PTA_ADX
3718 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3719 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3720 if (processor_alias_table[i].flags & PTA_FXSR
3721 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3722 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3723 if (processor_alias_table[i].flags & PTA_XSAVE
3724 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3725 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3726 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3727 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3728 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3729 if (processor_alias_table[i].flags & PTA_AVX512F
3730 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3731 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3732 if (processor_alias_table[i].flags & PTA_AVX512ER
3733 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3734 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3735 if (processor_alias_table[i].flags & PTA_AVX512PF
3736 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3737 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3738 if (processor_alias_table[i].flags & PTA_AVX512CD
3739 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3740 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3741 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3742 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3743 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3744 if (processor_alias_table[i].flags & PTA_PCOMMIT
3745 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3746 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3747 if (processor_alias_table[i].flags & PTA_CLWB
3748 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3749 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3750 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3751 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3752 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3753 if (processor_alias_table[i].flags & PTA_XSAVEC
3754 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3755 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3756 if (processor_alias_table[i].flags & PTA_XSAVES
3757 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3758 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3759 if (processor_alias_table[i].flags & PTA_AVX512DQ
3760 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3761 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3762 if (processor_alias_table[i].flags & PTA_AVX512BW
3763 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3764 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3765 if (processor_alias_table[i].flags & PTA_AVX512VL
3766 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3767 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3768 if (processor_alias_table[i].flags & PTA_MPX
3769 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3770 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3771 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3772 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3773 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3774 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3775 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3776 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3777 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3778 x86_prefetch_sse = true;
3783 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3784 error ("Intel MPX does not support x32");
3786 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3787 error ("Intel MPX does not support x32");
3789 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3790 error ("generic CPU can be used only for %stune=%s %s",
3791 prefix, suffix, sw);
3792 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3793 error ("intel CPU can be used only for %stune=%s %s",
3794 prefix, suffix, sw);
3795 else if (i == pta_size)
3796 error ("bad value (%s) for %sarch=%s %s",
3797 opts->x_ix86_arch_string, prefix, suffix, sw);
3799 ix86_arch_mask = 1u << ix86_arch;
3800 for (i = 0; i < X86_ARCH_LAST; ++i)
3801 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3803 for (i = 0; i < pta_size; i++)
3804 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3806 ix86_schedule = processor_alias_table[i].schedule;
3807 ix86_tune = processor_alias_table[i].processor;
3808 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3810 if (!(processor_alias_table[i].flags & PTA_64BIT))
3812 if (ix86_tune_defaulted)
3814 opts->x_ix86_tune_string = "x86-64";
3815 for (i = 0; i < pta_size; i++)
3816 if (! strcmp (opts->x_ix86_tune_string,
3817 processor_alias_table[i].name))
3819 ix86_schedule = processor_alias_table[i].schedule;
3820 ix86_tune = processor_alias_table[i].processor;
3823 error ("CPU you selected does not support x86-64 "
3827 /* Intel CPUs have always interpreted SSE prefetch instructions as
3828 NOPs; so, we can enable SSE prefetch instructions even when
3829 -mtune (rather than -march) points us to a processor that has them.
3830 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3831 higher processors. */
3833 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3834 x86_prefetch_sse = true;
3838 if (ix86_tune_specified && i == pta_size)
3839 error ("bad value (%s) for %stune=%s %s",
3840 opts->x_ix86_tune_string, prefix, suffix, sw);
3842 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3844 #ifndef USE_IX86_FRAME_POINTER
3845 #define USE_IX86_FRAME_POINTER 0
3848 #ifndef USE_X86_64_FRAME_POINTER
3849 #define USE_X86_64_FRAME_POINTER 0
3852 /* Set the default values for switches whose default depends on TARGET_64BIT
3853 in case they weren't overwritten by command line options. */
3854 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3856 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3857 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3858 if (opts->x_flag_asynchronous_unwind_tables
3859 && !opts_set->x_flag_unwind_tables
3860 && TARGET_64BIT_MS_ABI)
3861 opts->x_flag_unwind_tables = 1;
3862 if (opts->x_flag_asynchronous_unwind_tables == 2)
3863 opts->x_flag_unwind_tables
3864 = opts->x_flag_asynchronous_unwind_tables = 1;
3865 if (opts->x_flag_pcc_struct_return == 2)
3866 opts->x_flag_pcc_struct_return = 0;
3870 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3871 opts->x_flag_omit_frame_pointer
3872 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3873 if (opts->x_flag_asynchronous_unwind_tables == 2)
3874 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3875 if (opts->x_flag_pcc_struct_return == 2)
3876 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3879 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3880 /* TODO: ix86_cost should be chosen at instruction or function granuality
3881 so for cold code we use size_cost even in !optimize_size compilation. */
3882 if (opts->x_optimize_size)
3883 ix86_cost = &ix86_size_cost;
3885 ix86_cost = ix86_tune_cost;
3887 /* Arrange to set up i386_stack_locals for all functions. */
3888 init_machine_status = ix86_init_machine_status;
3890 /* Validate -mregparm= value. */
3891 if (opts_set->x_ix86_regparm)
3893 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3894 warning (0, "-mregparm is ignored in 64-bit mode");
3895 if (opts->x_ix86_regparm > REGPARM_MAX)
3897 error ("-mregparm=%d is not between 0 and %d",
3898 opts->x_ix86_regparm, REGPARM_MAX);
3899 opts->x_ix86_regparm = 0;
3902 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3903 opts->x_ix86_regparm = REGPARM_MAX;
3905 /* Default align_* from the processor table. */
3906 if (opts->x_align_loops == 0)
3908 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3909 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3911 if (opts->x_align_jumps == 0)
3913 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3914 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3916 if (opts->x_align_functions == 0)
3918 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3921 /* Provide default for -mbranch-cost= value. */
3922 if (!opts_set->x_ix86_branch_cost)
3923 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
3925 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3927 opts->x_target_flags
3928 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3930 /* Enable by default the SSE and MMX builtins. Do allow the user to
3931 explicitly disable any of these. In particular, disabling SSE and
3932 MMX for kernel code is extremely useful. */
3933 if (!ix86_arch_specified)
3934 opts->x_ix86_isa_flags
3935 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3936 | TARGET_SUBTARGET64_ISA_DEFAULT)
3937 & ~opts->x_ix86_isa_flags_explicit);
3939 if (TARGET_RTD_P (opts->x_target_flags))
3940 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3944 opts->x_target_flags
3945 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3947 if (!ix86_arch_specified)
3948 opts->x_ix86_isa_flags
3949 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3951 /* i386 ABI does not specify red zone. It still makes sense to use it
3952 when programmer takes care to stack from being destroyed. */
3953 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3954 opts->x_target_flags |= MASK_NO_RED_ZONE;
3957 /* Keep nonleaf frame pointers. */
3958 if (opts->x_flag_omit_frame_pointer)
3959 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3960 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3961 opts->x_flag_omit_frame_pointer = 1;
3963 /* If we're doing fast math, we don't care about comparison order
3964 wrt NaNs. This lets us use a shorter comparison sequence. */
3965 if (opts->x_flag_finite_math_only)
3966 opts->x_target_flags &= ~MASK_IEEE_FP;
3968 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3969 since the insns won't need emulation. */
3970 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3971 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3973 /* Likewise, if the target doesn't have a 387, or we've specified
3974 software floating point, don't use 387 inline intrinsics. */
3975 if (!TARGET_80387_P (opts->x_target_flags))
3976 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3978 /* Turn on MMX builtins for -msse. */
3979 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3980 opts->x_ix86_isa_flags
3981 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3983 /* Enable SSE prefetch. */
3984 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3985 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3986 x86_prefetch_sse = true;
3988 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3989 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3990 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3991 opts->x_ix86_isa_flags
3992 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3994 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3995 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3996 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3997 opts->x_ix86_isa_flags
3998 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4000 /* Enable lzcnt instruction for -mabm. */
4001 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4002 opts->x_ix86_isa_flags
4003 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4005 /* Validate -mpreferred-stack-boundary= value or default it to
4006 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4007 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4008 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4010 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4011 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4012 int max = (TARGET_SEH ? 4 : 12);
4014 if (opts->x_ix86_preferred_stack_boundary_arg < min
4015 || opts->x_ix86_preferred_stack_boundary_arg > max)
4018 error ("-mpreferred-stack-boundary is not supported "
4021 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4022 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4025 ix86_preferred_stack_boundary
4026 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4029 /* Set the default value for -mstackrealign. */
4030 if (opts->x_ix86_force_align_arg_pointer == -1)
4031 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4033 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4035 /* Validate -mincoming-stack-boundary= value or default it to
4036 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4037 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4038 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4040 if (opts->x_ix86_incoming_stack_boundary_arg
4041 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
4042 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4043 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4044 opts->x_ix86_incoming_stack_boundary_arg,
4045 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
4048 ix86_user_incoming_stack_boundary
4049 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4050 ix86_incoming_stack_boundary
4051 = ix86_user_incoming_stack_boundary;
4055 #ifndef NO_PROFILE_COUNTERS
4056 if (flag_nop_mcount)
4057 error ("-mnop-mcount is not compatible with this target");
4059 if (flag_nop_mcount && flag_pic)
4060 error ("-mnop-mcount is not implemented for -fPIC");
4062 /* Accept -msseregparm only if at least SSE support is enabled. */
4063 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4064 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4065 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4067 if (opts_set->x_ix86_fpmath)
4069 if (opts->x_ix86_fpmath & FPMATH_SSE)
4071 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4073 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4074 opts->x_ix86_fpmath = FPMATH_387;
4076 else if ((opts->x_ix86_fpmath & FPMATH_387)
4077 && !TARGET_80387_P (opts->x_target_flags))
4079 warning (0, "387 instruction set disabled, using SSE arithmetics");
4080 opts->x_ix86_fpmath = FPMATH_SSE;
4084 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4085 fpmath=387. The second is however default at many targets since the
4086 extra 80bit precision of temporaries is considered to be part of ABI.
4087 Overwrite the default at least for -ffast-math.
4088 TODO: -mfpmath=both seems to produce same performing code with bit
4089 smaller binaries. It is however not clear if register allocation is
4090 ready for this setting.
4091 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4092 codegen. We may switch to 387 with -ffast-math for size optimized
4094 else if (fast_math_flags_set_p (&global_options)
4095 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4096 opts->x_ix86_fpmath = FPMATH_SSE;
4098 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4100 /* If the i387 is disabled, then do not return values in it. */
4101 if (!TARGET_80387_P (opts->x_target_flags))
4102 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4104 /* Use external vectorized library in vectorizing intrinsics. */
4105 if (opts_set->x_ix86_veclibabi_type)
4106 switch (opts->x_ix86_veclibabi_type)
4108 case ix86_veclibabi_type_svml:
4109 ix86_veclib_handler = ix86_veclibabi_svml;
4112 case ix86_veclibabi_type_acml:
4113 ix86_veclib_handler = ix86_veclibabi_acml;
4120 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4121 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4122 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4124 /* If stack probes are required, the space used for large function
4125 arguments on the stack must also be probed, so enable
4126 -maccumulate-outgoing-args so this happens in the prologue. */
4127 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4128 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4130 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4131 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4132 "for correctness", prefix, suffix);
4133 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4136 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4139 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4140 p = strchr (internal_label_prefix, 'X');
4141 internal_label_prefix_len = p - internal_label_prefix;
4145 /* When scheduling description is not available, disable scheduler pass
4146 so it won't slow down the compilation and make x87 code slower. */
4147 if (!TARGET_SCHEDULE)
4148 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4150 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4151 ix86_tune_cost->simultaneous_prefetches,
4152 opts->x_param_values,
4153 opts_set->x_param_values);
4154 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4155 ix86_tune_cost->prefetch_block,
4156 opts->x_param_values,
4157 opts_set->x_param_values);
4158 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4159 ix86_tune_cost->l1_cache_size,
4160 opts->x_param_values,
4161 opts_set->x_param_values);
4162 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4163 ix86_tune_cost->l2_cache_size,
4164 opts->x_param_values,
4165 opts_set->x_param_values);
4167 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4168 if (opts->x_flag_prefetch_loop_arrays < 0
4170 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4171 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4172 opts->x_flag_prefetch_loop_arrays = 1;
4174 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4175 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4176 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4177 targetm.expand_builtin_va_start = NULL;
4179 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4181 ix86_gen_leave = gen_leave_rex64;
4182 if (Pmode == DImode)
4184 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4185 ix86_gen_tls_local_dynamic_base_64
4186 = gen_tls_local_dynamic_base_64_di;
4190 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4191 ix86_gen_tls_local_dynamic_base_64
4192 = gen_tls_local_dynamic_base_64_si;
4196 ix86_gen_leave = gen_leave;
4198 if (Pmode == DImode)
4200 ix86_gen_add3 = gen_adddi3;
4201 ix86_gen_sub3 = gen_subdi3;
4202 ix86_gen_sub3_carry = gen_subdi3_carry;
4203 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4204 ix86_gen_andsp = gen_anddi3;
4205 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4206 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4207 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4208 ix86_gen_monitor = gen_sse3_monitor_di;
4212 ix86_gen_add3 = gen_addsi3;
4213 ix86_gen_sub3 = gen_subsi3;
4214 ix86_gen_sub3_carry = gen_subsi3_carry;
4215 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4216 ix86_gen_andsp = gen_andsi3;
4217 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4218 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4219 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4220 ix86_gen_monitor = gen_sse3_monitor_si;
4224 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4225 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4226 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4229 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4231 if (opts->x_flag_fentry > 0)
4232 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4234 opts->x_flag_fentry = 0;
4236 else if (TARGET_SEH)
4238 if (opts->x_flag_fentry == 0)
4239 sorry ("-mno-fentry isn%'t compatible with SEH");
4240 opts->x_flag_fentry = 1;
4242 else if (opts->x_flag_fentry < 0)
4244 #if defined(PROFILE_BEFORE_PROLOGUE)
4245 opts->x_flag_fentry = 1;
4247 opts->x_flag_fentry = 0;
4251 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
4252 opts->x_target_flags |= MASK_VZEROUPPER;
4253 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4254 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4255 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4256 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4257 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4258 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4259 /* Enable 128-bit AVX instruction generation
4260 for the auto-vectorizer. */
4261 if (TARGET_AVX128_OPTIMAL
4262 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4263 opts->x_target_flags |= MASK_PREFER_AVX128;
4265 if (opts->x_ix86_recip_name)
4267 char *p = ASTRDUP (opts->x_ix86_recip_name);
4269 unsigned int mask, i;
4272 while ((q = strtok (p, ",")) != NULL)
4283 if (!strcmp (q, "default"))
4284 mask = RECIP_MASK_ALL;
4287 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4288 if (!strcmp (q, recip_options[i].string))
4290 mask = recip_options[i].mask;
4294 if (i == ARRAY_SIZE (recip_options))
4296 error ("unknown option for -mrecip=%s", q);
4298 mask = RECIP_MASK_NONE;
4302 opts->x_recip_mask_explicit |= mask;
4304 opts->x_recip_mask &= ~mask;
4306 opts->x_recip_mask |= mask;
4310 if (TARGET_RECIP_P (opts->x_target_flags))
4311 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4312 else if (opts_set->x_target_flags & MASK_RECIP)
4313 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4315 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4316 for 64-bit Bionic. */
4317 if (TARGET_HAS_BIONIC
4318 && !(opts_set->x_target_flags
4319 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4320 opts->x_target_flags |= (TARGET_64BIT
4321 ? MASK_LONG_DOUBLE_128
4322 : MASK_LONG_DOUBLE_64);
4324 /* Only one of them can be active. */
4325 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4326 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4328 /* Save the initial options in case the user does function specific
4331 target_option_default_node = target_option_current_node
4332 = build_target_option_node (opts);
4334 /* Handle stack protector */
4335 if (!opts_set->x_ix86_stack_protector_guard)
4336 opts->x_ix86_stack_protector_guard
4337 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4339 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4340 if (opts->x_ix86_tune_memcpy_strategy)
4342 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4343 ix86_parse_stringop_strategy_string (str, false);
4347 if (opts->x_ix86_tune_memset_strategy)
4349 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4350 ix86_parse_stringop_strategy_string (str, true);
4355 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4358 ix86_option_override (void)
4360 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4361 struct register_pass_info insert_vzeroupper_info
4362 = { pass_insert_vzeroupper, "reload",
4363 1, PASS_POS_INSERT_AFTER
4366 ix86_option_override_internal (true, &global_options, &global_options_set);
4369 /* This needs to be done at start up. It's convenient to do it here. */
4370 register_pass (&insert_vzeroupper_info);
4373 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4375 ix86_offload_options (void)
4378 return xstrdup ("-foffload-abi=lp64");
4379 return xstrdup ("-foffload-abi=ilp32");
4382 /* Update register usage after having seen the compiler flags. */
4385 ix86_conditional_register_usage (void)
4389 /* For 32-bit targets, squash the REX registers. */
4392 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4393 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4394 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4395 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4396 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4397 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4400 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4401 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4402 : TARGET_64BIT ? (1 << 2)
4405 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4407 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4409 /* Set/reset conditionally defined registers from
4410 CALL_USED_REGISTERS initializer. */
4411 if (call_used_regs[i] > 1)
4412 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4414 /* Calculate registers of CLOBBERED_REGS register set
4415 as call used registers from GENERAL_REGS register set. */
4416 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4417 && call_used_regs[i])
4418 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4421 /* If MMX is disabled, squash the registers. */
4423 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4424 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4425 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4427 /* If SSE is disabled, squash the registers. */
4429 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4430 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4431 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4433 /* If the FPU is disabled, squash the registers. */
4434 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4435 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4436 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4437 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4439 /* If AVX512F is disabled, squash the registers. */
4440 if (! TARGET_AVX512F)
4442 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4443 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4445 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4446 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4449 /* If MPX is disabled, squash the registers. */
4451 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4452 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4456 /* Save the current options */
4459 ix86_function_specific_save (struct cl_target_option *ptr,
4460 struct gcc_options *opts)
4462 ptr->arch = ix86_arch;
4463 ptr->schedule = ix86_schedule;
4464 ptr->prefetch_sse = x86_prefetch_sse;
4465 ptr->tune = ix86_tune;
4466 ptr->branch_cost = ix86_branch_cost;
4467 ptr->tune_defaulted = ix86_tune_defaulted;
4468 ptr->arch_specified = ix86_arch_specified;
4469 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4470 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4471 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4472 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4473 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4474 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4475 ptr->x_ix86_abi = opts->x_ix86_abi;
4476 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4477 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4478 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4479 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4480 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4481 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4482 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4483 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4484 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4485 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4486 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4487 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4488 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4489 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4490 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4491 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4492 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4493 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4494 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4495 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4497 /* The fields are char but the variables are not; make sure the
4498 values fit in the fields. */
4499 gcc_assert (ptr->arch == ix86_arch);
4500 gcc_assert (ptr->schedule == ix86_schedule);
4501 gcc_assert (ptr->tune == ix86_tune);
4502 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4505 /* Restore the current options */
4508 ix86_function_specific_restore (struct gcc_options *opts,
4509 struct cl_target_option *ptr)
4511 enum processor_type old_tune = ix86_tune;
4512 enum processor_type old_arch = ix86_arch;
4513 unsigned int ix86_arch_mask;
4516 /* We don't change -fPIC. */
4517 opts->x_flag_pic = flag_pic;
4519 ix86_arch = (enum processor_type) ptr->arch;
4520 ix86_schedule = (enum attr_cpu) ptr->schedule;
4521 ix86_tune = (enum processor_type) ptr->tune;
4522 x86_prefetch_sse = ptr->prefetch_sse;
4523 opts->x_ix86_branch_cost = ptr->branch_cost;
4524 ix86_tune_defaulted = ptr->tune_defaulted;
4525 ix86_arch_specified = ptr->arch_specified;
4526 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4527 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4528 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4529 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4530 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4531 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4532 opts->x_ix86_abi = ptr->x_ix86_abi;
4533 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4534 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4535 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4536 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4537 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4538 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4539 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4540 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4541 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4542 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4543 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4544 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4545 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4546 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4547 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4548 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4549 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4550 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4551 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4552 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4553 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4554 /* TODO: ix86_cost should be chosen at instruction or function granuality
4555 so for cold code we use size_cost even in !optimize_size compilation. */
4556 if (opts->x_optimize_size)
4557 ix86_cost = &ix86_size_cost;
4559 ix86_cost = ix86_tune_cost;
4561 /* Recreate the arch feature tests if the arch changed */
4562 if (old_arch != ix86_arch)
4564 ix86_arch_mask = 1u << ix86_arch;
4565 for (i = 0; i < X86_ARCH_LAST; ++i)
4566 ix86_arch_features[i]
4567 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4570 /* Recreate the tune optimization tests */
4571 if (old_tune != ix86_tune)
4572 set_ix86_tune_features (ix86_tune, false);
4575 /* Adjust target options after streaming them in. This is mainly about
4576 reconciling them with global options. */
4579 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
4581 /* flag_pic is a global option, but ix86_cmodel is target saved option
4582 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
4583 for PIC, or error out. */
4585 switch (ptr->x_ix86_cmodel)
4588 ptr->x_ix86_cmodel = CM_SMALL_PIC;
4592 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
4596 ptr->x_ix86_cmodel = CM_LARGE_PIC;
4600 error ("code model %s does not support PIC mode", "kernel");
4607 switch (ptr->x_ix86_cmodel)
4610 ptr->x_ix86_cmodel = CM_SMALL;
4614 ptr->x_ix86_cmodel = CM_MEDIUM;
4618 ptr->x_ix86_cmodel = CM_LARGE;
4626 /* Print the current options */
4629 ix86_function_specific_print (FILE *file, int indent,
4630 struct cl_target_option *ptr)
4633 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4634 NULL, NULL, ptr->x_ix86_fpmath, false);
4636 gcc_assert (ptr->arch < PROCESSOR_max);
4637 fprintf (file, "%*sarch = %d (%s)\n",
4639 ptr->arch, processor_target_table[ptr->arch].name);
4641 gcc_assert (ptr->tune < PROCESSOR_max);
4642 fprintf (file, "%*stune = %d (%s)\n",
4644 ptr->tune, processor_target_table[ptr->tune].name);
4646 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4650 fprintf (file, "%*s%s\n", indent, "", target_string);
4651 free (target_string);
4656 /* Inner function to process the attribute((target(...))), take an argument and
4657 set the current options from the argument. If we have a list, recursively go
4661 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4662 struct gcc_options *opts,
4663 struct gcc_options *opts_set,
4664 struct gcc_options *enum_opts_set)
4669 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4670 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4671 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4672 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4673 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4689 enum ix86_opt_type type;
4694 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4695 IX86_ATTR_ISA ("abm", OPT_mabm),
4696 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4697 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4698 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4699 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4700 IX86_ATTR_ISA ("aes", OPT_maes),
4701 IX86_ATTR_ISA ("sha", OPT_msha),
4702 IX86_ATTR_ISA ("avx", OPT_mavx),
4703 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4704 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4705 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4706 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4707 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4708 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4709 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4710 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4711 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4712 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4713 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4714 IX86_ATTR_ISA ("sse", OPT_msse),
4715 IX86_ATTR_ISA ("sse2", OPT_msse2),
4716 IX86_ATTR_ISA ("sse3", OPT_msse3),
4717 IX86_ATTR_ISA ("sse4", OPT_msse4),
4718 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4719 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4720 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4721 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4722 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4723 IX86_ATTR_ISA ("fma", OPT_mfma),
4724 IX86_ATTR_ISA ("xop", OPT_mxop),
4725 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4726 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4727 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4728 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4729 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4730 IX86_ATTR_ISA ("hle", OPT_mhle),
4731 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4732 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4733 IX86_ATTR_ISA ("adx", OPT_madx),
4734 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4735 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4736 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4737 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4738 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4739 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4740 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4741 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4742 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4743 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4744 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4747 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4749 /* string options */
4750 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4751 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4754 IX86_ATTR_YES ("cld",
4758 IX86_ATTR_NO ("fancy-math-387",
4759 OPT_mfancy_math_387,
4760 MASK_NO_FANCY_MATH_387),
4762 IX86_ATTR_YES ("ieee-fp",
4766 IX86_ATTR_YES ("inline-all-stringops",
4767 OPT_minline_all_stringops,
4768 MASK_INLINE_ALL_STRINGOPS),
4770 IX86_ATTR_YES ("inline-stringops-dynamically",
4771 OPT_minline_stringops_dynamically,
4772 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4774 IX86_ATTR_NO ("align-stringops",
4775 OPT_mno_align_stringops,
4776 MASK_NO_ALIGN_STRINGOPS),
4778 IX86_ATTR_YES ("recip",
4784 /* If this is a list, recurse to get the options. */
4785 if (TREE_CODE (args) == TREE_LIST)
4789 for (; args; args = TREE_CHAIN (args))
4790 if (TREE_VALUE (args)
4791 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4792 p_strings, opts, opts_set,
4799 else if (TREE_CODE (args) != STRING_CST)
4801 error ("attribute %<target%> argument not a string");
4805 /* Handle multiple arguments separated by commas. */
4806 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4808 while (next_optstr && *next_optstr != '\0')
4810 char *p = next_optstr;
4812 char *comma = strchr (next_optstr, ',');
4813 const char *opt_string;
4814 size_t len, opt_len;
4819 enum ix86_opt_type type = ix86_opt_unknown;
4825 len = comma - next_optstr;
4826 next_optstr = comma + 1;
4834 /* Recognize no-xxx. */
4835 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4844 /* Find the option. */
4847 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4849 type = attrs[i].type;
4850 opt_len = attrs[i].len;
4851 if (ch == attrs[i].string[0]
4852 && ((type != ix86_opt_str && type != ix86_opt_enum)
4855 && memcmp (p, attrs[i].string, opt_len) == 0)
4858 mask = attrs[i].mask;
4859 opt_string = attrs[i].string;
4864 /* Process the option. */
4867 error ("attribute(target(\"%s\")) is unknown", orig_p);
4871 else if (type == ix86_opt_isa)
4873 struct cl_decoded_option decoded;
4875 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4876 ix86_handle_option (opts, opts_set,
4877 &decoded, input_location);
4880 else if (type == ix86_opt_yes || type == ix86_opt_no)
4882 if (type == ix86_opt_no)
4883 opt_set_p = !opt_set_p;
4886 opts->x_target_flags |= mask;
4888 opts->x_target_flags &= ~mask;
4891 else if (type == ix86_opt_str)
4895 error ("option(\"%s\") was already specified", opt_string);
4899 p_strings[opt] = xstrdup (p + opt_len);
4902 else if (type == ix86_opt_enum)
4907 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4909 set_option (opts, enum_opts_set, opt, value,
4910 p + opt_len, DK_UNSPECIFIED, input_location,
4914 error ("attribute(target(\"%s\")) is unknown", orig_p);
4926 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4929 ix86_valid_target_attribute_tree (tree args,
4930 struct gcc_options *opts,
4931 struct gcc_options *opts_set)
4933 const char *orig_arch_string = opts->x_ix86_arch_string;
4934 const char *orig_tune_string = opts->x_ix86_tune_string;
4935 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4936 int orig_tune_defaulted = ix86_tune_defaulted;
4937 int orig_arch_specified = ix86_arch_specified;
4938 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4941 struct cl_target_option *def
4942 = TREE_TARGET_OPTION (target_option_default_node);
4943 struct gcc_options enum_opts_set;
4945 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4947 /* Process each of the options on the chain. */
4948 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4949 opts_set, &enum_opts_set))
4950 return error_mark_node;
4952 /* If the changed options are different from the default, rerun
4953 ix86_option_override_internal, and then save the options away.
4954 The string options are are attribute options, and will be undone
4955 when we copy the save structure. */
4956 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4957 || opts->x_target_flags != def->x_target_flags
4958 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4959 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4960 || enum_opts_set.x_ix86_fpmath)
4962 /* If we are using the default tune= or arch=, undo the string assigned,
4963 and use the default. */
4964 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4965 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4966 else if (!orig_arch_specified)
4967 opts->x_ix86_arch_string = NULL;
4969 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4970 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4971 else if (orig_tune_defaulted)
4972 opts->x_ix86_tune_string = NULL;
4974 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4975 if (enum_opts_set.x_ix86_fpmath)
4976 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4977 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4978 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4980 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4981 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4984 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4985 ix86_option_override_internal (false, opts, opts_set);
4987 /* Add any builtin functions with the new isa if any. */
4988 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4990 /* Save the current options unless we are validating options for
4992 t = build_target_option_node (opts);
4994 opts->x_ix86_arch_string = orig_arch_string;
4995 opts->x_ix86_tune_string = orig_tune_string;
4996 opts_set->x_ix86_fpmath = orig_fpmath_set;
4998 /* Free up memory allocated to hold the strings */
4999 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5000 free (option_strings[i]);
5006 /* Hook to validate attribute((target("string"))). */
5009 ix86_valid_target_attribute_p (tree fndecl,
5010 tree ARG_UNUSED (name),
5012 int ARG_UNUSED (flags))
5014 struct gcc_options func_options;
5015 tree new_target, new_optimize;
5018 /* attribute((target("default"))) does nothing, beyond
5019 affecting multi-versioning. */
5020 if (TREE_VALUE (args)
5021 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5022 && TREE_CHAIN (args) == NULL_TREE
5023 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5026 tree old_optimize = build_optimization_node (&global_options);
5028 /* Get the optimization options of the current function. */
5029 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5032 func_optimize = old_optimize;
5034 /* Init func_options. */
5035 memset (&func_options, 0, sizeof (func_options));
5036 init_options_struct (&func_options, NULL);
5037 lang_hooks.init_options_struct (&func_options);
5039 cl_optimization_restore (&func_options,
5040 TREE_OPTIMIZATION (func_optimize));
5042 /* Initialize func_options to the default before its target options can
5044 cl_target_option_restore (&func_options,
5045 TREE_TARGET_OPTION (target_option_default_node));
5047 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5048 &global_options_set);
5050 new_optimize = build_optimization_node (&func_options);
5052 if (new_target == error_mark_node)
5055 else if (fndecl && new_target)
5057 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5059 if (old_optimize != new_optimize)
5060 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5067 /* Hook to determine if one function can safely inline another. */
5070 ix86_can_inline_p (tree caller, tree callee)
5073 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5074 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5076 /* If callee has no option attributes, then it is ok to inline. */
5080 /* If caller has no option attributes, but callee does then it is not ok to
5082 else if (!caller_tree)
5087 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5088 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5090 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5091 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5093 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5094 != callee_opts->x_ix86_isa_flags)
5097 /* See if we have the same non-isa options. */
5098 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5101 /* See if arch, tune, etc. are the same. */
5102 else if (caller_opts->arch != callee_opts->arch)
5105 else if (caller_opts->tune != callee_opts->tune)
5108 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5111 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5122 /* Remember the last target of ix86_set_current_function. */
5123 static GTY(()) tree ix86_previous_fndecl;
5125 /* Set targets globals to the default (or current #pragma GCC target
5126 if active). Invalidate ix86_previous_fndecl cache. */
5129 ix86_reset_previous_fndecl (void)
5131 tree new_tree = target_option_current_node;
5132 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5133 if (TREE_TARGET_GLOBALS (new_tree))
5134 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5135 else if (new_tree == target_option_default_node)
5136 restore_target_globals (&default_target_globals);
5138 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5139 ix86_previous_fndecl = NULL_TREE;
5142 /* Establish appropriate back-end context for processing the function
5143 FNDECL. The argument might be NULL to indicate processing at top
5144 level, outside of any function scope. */
5146 ix86_set_current_function (tree fndecl)
5148 /* Only change the context if the function changes. This hook is called
5149 several times in the course of compiling a function, and we don't want to
5150 slow things down too much or call target_reinit when it isn't safe. */
5151 if (fndecl == ix86_previous_fndecl)
5155 if (ix86_previous_fndecl == NULL_TREE)
5156 old_tree = target_option_current_node;
5157 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5158 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5160 old_tree = target_option_default_node;
5162 if (fndecl == NULL_TREE)
5164 if (old_tree != target_option_current_node)
5165 ix86_reset_previous_fndecl ();
5169 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5170 if (new_tree == NULL_TREE)
5171 new_tree = target_option_default_node;
5173 if (old_tree != new_tree)
5175 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5176 if (TREE_TARGET_GLOBALS (new_tree))
5177 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5178 else if (new_tree == target_option_default_node)
5179 restore_target_globals (&default_target_globals);
5181 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5183 ix86_previous_fndecl = fndecl;
5187 /* Return true if this goes in large data/bss. */
5190 ix86_in_large_data_p (tree exp)
5192 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5195 /* Functions are never large data. */
5196 if (TREE_CODE (exp) == FUNCTION_DECL)
5199 /* Automatic variables are never large data. */
5200 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5203 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5205 const char *section = DECL_SECTION_NAME (exp);
5206 if (strcmp (section, ".ldata") == 0
5207 || strcmp (section, ".lbss") == 0)
5213 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5215 /* If this is an incomplete type with size 0, then we can't put it
5216 in data because it might be too big when completed. Also,
5217 int_size_in_bytes returns -1 if size can vary or is larger than
5218 an integer in which case also it is safer to assume that it goes in
5220 if (size <= 0 || size > ix86_section_threshold)
5227 /* Switch to the appropriate section for output of DECL.
5228 DECL is either a `VAR_DECL' node or a constant of some sort.
5229 RELOC indicates whether forming the initial value of DECL requires
5230 link-time relocations. */
5232 ATTRIBUTE_UNUSED static section *
5233 x86_64_elf_select_section (tree decl, int reloc,
5234 unsigned HOST_WIDE_INT align)
5236 if (ix86_in_large_data_p (decl))
5238 const char *sname = NULL;
5239 unsigned int flags = SECTION_WRITE;
5240 switch (categorize_decl_for_section (decl, reloc))
5245 case SECCAT_DATA_REL:
5246 sname = ".ldata.rel";
5248 case SECCAT_DATA_REL_LOCAL:
5249 sname = ".ldata.rel.local";
5251 case SECCAT_DATA_REL_RO:
5252 sname = ".ldata.rel.ro";
5254 case SECCAT_DATA_REL_RO_LOCAL:
5255 sname = ".ldata.rel.ro.local";
5259 flags |= SECTION_BSS;
5262 case SECCAT_RODATA_MERGE_STR:
5263 case SECCAT_RODATA_MERGE_STR_INIT:
5264 case SECCAT_RODATA_MERGE_CONST:
5268 case SECCAT_SRODATA:
5275 /* We don't split these for medium model. Place them into
5276 default sections and hope for best. */
5281 /* We might get called with string constants, but get_named_section
5282 doesn't like them as they are not DECLs. Also, we need to set
5283 flags in that case. */
5285 return get_section (sname, flags, NULL);
5286 return get_named_section (decl, sname, reloc);
5289 return default_elf_select_section (decl, reloc, align);
5292 /* Select a set of attributes for section NAME based on the properties
5293 of DECL and whether or not RELOC indicates that DECL's initializer
5294 might contain runtime relocations. */
5296 static unsigned int ATTRIBUTE_UNUSED
5297 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5299 unsigned int flags = default_section_type_flags (decl, name, reloc);
5301 if (decl == NULL_TREE
5302 && (strcmp (name, ".ldata.rel.ro") == 0
5303 || strcmp (name, ".ldata.rel.ro.local") == 0))
5304 flags |= SECTION_RELRO;
5306 if (strcmp (name, ".lbss") == 0
5307 || strncmp (name, ".lbss.", 5) == 0
5308 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5309 flags |= SECTION_BSS;
5314 /* Build up a unique section name, expressed as a
5315 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5316 RELOC indicates whether the initial value of EXP requires
5317 link-time relocations. */
5319 static void ATTRIBUTE_UNUSED
5320 x86_64_elf_unique_section (tree decl, int reloc)
5322 if (ix86_in_large_data_p (decl))
5324 const char *prefix = NULL;
5325 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5326 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5328 switch (categorize_decl_for_section (decl, reloc))
5331 case SECCAT_DATA_REL:
5332 case SECCAT_DATA_REL_LOCAL:
5333 case SECCAT_DATA_REL_RO:
5334 case SECCAT_DATA_REL_RO_LOCAL:
5335 prefix = one_only ? ".ld" : ".ldata";
5338 prefix = one_only ? ".lb" : ".lbss";
5341 case SECCAT_RODATA_MERGE_STR:
5342 case SECCAT_RODATA_MERGE_STR_INIT:
5343 case SECCAT_RODATA_MERGE_CONST:
5344 prefix = one_only ? ".lr" : ".lrodata";
5346 case SECCAT_SRODATA:
5353 /* We don't split these for medium model. Place them into
5354 default sections and hope for best. */
5359 const char *name, *linkonce;
5362 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5363 name = targetm.strip_name_encoding (name);
5365 /* If we're using one_only, then there needs to be a .gnu.linkonce
5366 prefix to the section name. */
5367 linkonce = one_only ? ".gnu.linkonce" : "";
5369 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5371 set_decl_section_name (decl, string);
5375 default_unique_section (decl, reloc);
5378 #ifdef COMMON_ASM_OP
5379 /* This says how to output assembler code to declare an
5380 uninitialized external linkage data object.
5382 For medium model x86-64 we need to use .largecomm opcode for
5385 x86_elf_aligned_common (FILE *file,
5386 const char *name, unsigned HOST_WIDE_INT size,
5389 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5390 && size > (unsigned int)ix86_section_threshold)
5391 fputs ("\t.largecomm\t", file);
5393 fputs (COMMON_ASM_OP, file);
5394 assemble_name (file, name);
5395 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5396 size, align / BITS_PER_UNIT);
5400 /* Utility function for targets to use in implementing
5401 ASM_OUTPUT_ALIGNED_BSS. */
5404 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5405 unsigned HOST_WIDE_INT size, int align)
5407 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5408 && size > (unsigned int)ix86_section_threshold)
5409 switch_to_section (get_named_section (decl, ".lbss", 0));
5411 switch_to_section (bss_section);
5412 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5413 #ifdef ASM_DECLARE_OBJECT_NAME
5414 last_assemble_variable_decl = decl;
5415 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5417 /* Standard thing is just output label for the object. */
5418 ASM_OUTPUT_LABEL (file, name);
5419 #endif /* ASM_DECLARE_OBJECT_NAME */
5420 ASM_OUTPUT_SKIP (file, size ? size : 1);
5423 /* Decide whether we must probe the stack before any space allocation
5424 on this target. It's essentially TARGET_STACK_PROBE except when
5425 -fstack-check causes the stack to be already probed differently. */
5428 ix86_target_stack_probe (void)
5430 /* Do not probe the stack twice if static stack checking is enabled. */
5431 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5434 return TARGET_STACK_PROBE;
5437 /* Decide whether we can make a sibling call to a function. DECL is the
5438 declaration of the function being targeted by the call and EXP is the
5439 CALL_EXPR representing the call. */
5442 ix86_function_ok_for_sibcall (tree decl, tree exp)
5444 tree type, decl_or_type;
5447 /* If we are generating position-independent code, we cannot sibcall
5448 optimize any indirect call, or a direct call to a global function,
5449 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5453 && (!decl || !targetm.binds_local_p (decl)))
5456 /* If we need to align the outgoing stack, then sibcalling would
5457 unalign the stack, which may break the called function. */
5458 if (ix86_minimum_incoming_stack_boundary (true)
5459 < PREFERRED_STACK_BOUNDARY)
5464 decl_or_type = decl;
5465 type = TREE_TYPE (decl);
5469 /* We're looking at the CALL_EXPR, we need the type of the function. */
5470 type = CALL_EXPR_FN (exp); /* pointer expression */
5471 type = TREE_TYPE (type); /* pointer type */
5472 type = TREE_TYPE (type); /* function type */
5473 decl_or_type = type;
5476 /* Check that the return value locations are the same. Like
5477 if we are returning floats on the 80387 register stack, we cannot
5478 make a sibcall from a function that doesn't return a float to a
5479 function that does or, conversely, from a function that does return
5480 a float to a function that doesn't; the necessary stack adjustment
5481 would not be executed. This is also the place we notice
5482 differences in the return value ABI. Note that it is ok for one
5483 of the functions to have void return type as long as the return
5484 value of the other is passed in a register. */
5485 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5486 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5488 if (STACK_REG_P (a) || STACK_REG_P (b))
5490 if (!rtx_equal_p (a, b))
5493 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5495 else if (!rtx_equal_p (a, b))
5500 /* The SYSV ABI has more call-clobbered registers;
5501 disallow sibcalls from MS to SYSV. */
5502 if (cfun->machine->call_abi == MS_ABI
5503 && ix86_function_type_abi (type) == SYSV_ABI)
5508 /* If this call is indirect, we'll need to be able to use a
5509 call-clobbered register for the address of the target function.
5510 Make sure that all such registers are not used for passing
5511 parameters. Note that DLLIMPORT functions are indirect. */
5513 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5515 if (ix86_function_regparm (type, NULL) >= 3)
5517 /* ??? Need to count the actual number of registers to be used,
5518 not the possible number of registers. Fix later. */
5524 /* Otherwise okay. That also includes certain types of indirect calls. */
5528 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5529 and "sseregparm" calling convention attributes;
5530 arguments as in struct attribute_spec.handler. */
5533 ix86_handle_cconv_attribute (tree *node, tree name,
5538 if (TREE_CODE (*node) != FUNCTION_TYPE
5539 && TREE_CODE (*node) != METHOD_TYPE
5540 && TREE_CODE (*node) != FIELD_DECL
5541 && TREE_CODE (*node) != TYPE_DECL)
5543 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5545 *no_add_attrs = true;
5549 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5550 if (is_attribute_p ("regparm", name))
5554 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5556 error ("fastcall and regparm attributes are not compatible");
5559 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5561 error ("regparam and thiscall attributes are not compatible");
5564 cst = TREE_VALUE (args);
5565 if (TREE_CODE (cst) != INTEGER_CST)
5567 warning (OPT_Wattributes,
5568 "%qE attribute requires an integer constant argument",
5570 *no_add_attrs = true;
5572 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5574 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5576 *no_add_attrs = true;
5584 /* Do not warn when emulating the MS ABI. */
5585 if ((TREE_CODE (*node) != FUNCTION_TYPE
5586 && TREE_CODE (*node) != METHOD_TYPE)
5587 || ix86_function_type_abi (*node) != MS_ABI)
5588 warning (OPT_Wattributes, "%qE attribute ignored",
5590 *no_add_attrs = true;
5594 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5595 if (is_attribute_p ("fastcall", name))
5597 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5599 error ("fastcall and cdecl attributes are not compatible");
5601 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5603 error ("fastcall and stdcall attributes are not compatible");
5605 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5607 error ("fastcall and regparm attributes are not compatible");
5609 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5611 error ("fastcall and thiscall attributes are not compatible");
5615 /* Can combine stdcall with fastcall (redundant), regparm and
5617 else if (is_attribute_p ("stdcall", name))
5619 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5621 error ("stdcall and cdecl attributes are not compatible");
5623 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5625 error ("stdcall and fastcall attributes are not compatible");
5627 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5629 error ("stdcall and thiscall attributes are not compatible");
5633 /* Can combine cdecl with regparm and sseregparm. */
5634 else if (is_attribute_p ("cdecl", name))
5636 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5638 error ("stdcall and cdecl attributes are not compatible");
5640 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5642 error ("fastcall and cdecl attributes are not compatible");
5644 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5646 error ("cdecl and thiscall attributes are not compatible");
5649 else if (is_attribute_p ("thiscall", name))
5651 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5652 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5654 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5656 error ("stdcall and thiscall attributes are not compatible");
5658 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5660 error ("fastcall and thiscall attributes are not compatible");
5662 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5664 error ("cdecl and thiscall attributes are not compatible");
5668 /* Can combine sseregparm with all attributes. */
5673 /* The transactional memory builtins are implicitly regparm or fastcall
5674 depending on the ABI. Override the generic do-nothing attribute that
5675 these builtins were declared with, and replace it with one of the two
5676 attributes that we expect elsewhere. */
5679 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5680 int flags, bool *no_add_attrs)
5684 /* In no case do we want to add the placeholder attribute. */
5685 *no_add_attrs = true;
5687 /* The 64-bit ABI is unchanged for transactional memory. */
5691 /* ??? Is there a better way to validate 32-bit windows? We have
5692 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5693 if (CHECK_STACK_LIMIT > 0)
5694 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5697 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5698 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5700 decl_attributes (node, alt, flags);
5705 /* This function determines from TYPE the calling-convention. */
5708 ix86_get_callcvt (const_tree type)
5710 unsigned int ret = 0;
5715 return IX86_CALLCVT_CDECL;
5717 attrs = TYPE_ATTRIBUTES (type);
5718 if (attrs != NULL_TREE)
5720 if (lookup_attribute ("cdecl", attrs))
5721 ret |= IX86_CALLCVT_CDECL;
5722 else if (lookup_attribute ("stdcall", attrs))
5723 ret |= IX86_CALLCVT_STDCALL;
5724 else if (lookup_attribute ("fastcall", attrs))
5725 ret |= IX86_CALLCVT_FASTCALL;
5726 else if (lookup_attribute ("thiscall", attrs))
5727 ret |= IX86_CALLCVT_THISCALL;
5729 /* Regparam isn't allowed for thiscall and fastcall. */
5730 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5732 if (lookup_attribute ("regparm", attrs))
5733 ret |= IX86_CALLCVT_REGPARM;
5734 if (lookup_attribute ("sseregparm", attrs))
5735 ret |= IX86_CALLCVT_SSEREGPARM;
5738 if (IX86_BASE_CALLCVT(ret) != 0)
5742 is_stdarg = stdarg_p (type);
5743 if (TARGET_RTD && !is_stdarg)
5744 return IX86_CALLCVT_STDCALL | ret;
5748 || TREE_CODE (type) != METHOD_TYPE
5749 || ix86_function_type_abi (type) != MS_ABI)
5750 return IX86_CALLCVT_CDECL | ret;
5752 return IX86_CALLCVT_THISCALL;
5755 /* Return 0 if the attributes for two types are incompatible, 1 if they
5756 are compatible, and 2 if they are nearly compatible (which causes a
5757 warning to be generated). */
5760 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5762 unsigned int ccvt1, ccvt2;
5764 if (TREE_CODE (type1) != FUNCTION_TYPE
5765 && TREE_CODE (type1) != METHOD_TYPE)
5768 ccvt1 = ix86_get_callcvt (type1);
5769 ccvt2 = ix86_get_callcvt (type2);
5772 if (ix86_function_regparm (type1, NULL)
5773 != ix86_function_regparm (type2, NULL))
5779 /* Return the regparm value for a function with the indicated TYPE and DECL.
5780 DECL may be NULL when calling function indirectly
5781 or considering a libcall. */
5784 ix86_function_regparm (const_tree type, const_tree decl)
5791 return (ix86_function_type_abi (type) == SYSV_ABI
5792 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5793 ccvt = ix86_get_callcvt (type);
5794 regparm = ix86_regparm;
5796 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5798 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5801 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5805 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5807 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5810 /* Use register calling convention for local functions when possible. */
5812 && TREE_CODE (decl) == FUNCTION_DECL)
5814 cgraph_node *target = cgraph_node::get (decl);
5816 target = target->function_symbol ();
5818 /* Caller and callee must agree on the calling convention, so
5819 checking here just optimize means that with
5820 __attribute__((optimize (...))) caller could use regparm convention
5821 and callee not, or vice versa. Instead look at whether the callee
5822 is optimized or not. */
5823 if (target && opt_for_fn (target->decl, optimize)
5824 && !(profile_flag && !flag_fentry))
5826 cgraph_local_info *i = &target->local;
5827 if (i && i->local && i->can_change_signature)
5829 int local_regparm, globals = 0, regno;
5831 /* Make sure no regparm register is taken by a
5832 fixed register variable. */
5833 for (local_regparm = 0; local_regparm < REGPARM_MAX;
5835 if (fixed_regs[local_regparm])
5838 /* We don't want to use regparm(3) for nested functions as
5839 these use a static chain pointer in the third argument. */
5840 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
5843 /* Save a register for the split stack. */
5844 if (local_regparm == 3 && flag_split_stack)
5847 /* Each fixed register usage increases register pressure,
5848 so less registers should be used for argument passing.
5849 This functionality can be overriden by an explicit
5851 for (regno = AX_REG; regno <= DI_REG; regno++)
5852 if (fixed_regs[regno])
5856 = globals < local_regparm ? local_regparm - globals : 0;
5858 if (local_regparm > regparm)
5859 regparm = local_regparm;
5867 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5868 DFmode (2) arguments in SSE registers for a function with the
5869 indicated TYPE and DECL. DECL may be NULL when calling function
5870 indirectly or considering a libcall. Otherwise return 0. */
5873 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5875 gcc_assert (!TARGET_64BIT);
5877 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5878 by the sseregparm attribute. */
5879 if (TARGET_SSEREGPARM
5880 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5887 error ("calling %qD with attribute sseregparm without "
5888 "SSE/SSE2 enabled", decl);
5890 error ("calling %qT with attribute sseregparm without "
5891 "SSE/SSE2 enabled", type);
5902 cgraph_node *target = cgraph_node::get (decl);
5904 target = target->function_symbol ();
5906 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5907 (and DFmode for SSE2) arguments in SSE registers. */
5909 /* TARGET_SSE_MATH */
5910 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
5911 && opt_for_fn (target->decl, optimize)
5912 && !(profile_flag && !flag_fentry))
5914 cgraph_local_info *i = &target->local;
5915 if (i && i->local && i->can_change_signature)
5917 /* Refuse to produce wrong code when local function with SSE enabled
5918 is called from SSE disabled function.
5919 We may work hard to work out these scenarios but hopefully
5920 it doesnot matter in practice. */
5921 if (!TARGET_SSE && warn)
5923 error ("calling %qD with SSE caling convention without "
5924 "SSE/SSE2 enabled", decl);
5927 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
5928 ->x_ix86_isa_flags) ? 2 : 1;
5935 /* Return true if EAX is live at the start of the function. Used by
5936 ix86_expand_prologue to determine if we need special help before
5937 calling allocate_stack_worker. */
5940 ix86_eax_live_at_start_p (void)
5942 /* Cheat. Don't bother working forward from ix86_function_regparm
5943 to the function type to whether an actual argument is located in
5944 eax. Instead just look at cfg info, which is still close enough
5945 to correct at this point. This gives false positives for broken
5946 functions that might use uninitialized data that happens to be
5947 allocated in eax, but who cares? */
5948 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5952 ix86_keep_aggregate_return_pointer (tree fntype)
5958 attr = lookup_attribute ("callee_pop_aggregate_return",
5959 TYPE_ATTRIBUTES (fntype));
5961 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5963 /* For 32-bit MS-ABI the default is to keep aggregate
5965 if (ix86_function_type_abi (fntype) == MS_ABI)
5968 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5971 /* Value is the number of bytes of arguments automatically
5972 popped when returning from a subroutine call.
5973 FUNDECL is the declaration node of the function (as a tree),
5974 FUNTYPE is the data type of the function (as a tree),
5975 or for a library call it is an identifier node for the subroutine name.
5976 SIZE is the number of bytes of arguments passed on the stack.
5978 On the 80386, the RTD insn may be used to pop them if the number
5979 of args is fixed, but if the number is variable then the caller
5980 must pop them all. RTD can't be used for library calls now
5981 because the library is compiled with the Unix compiler.
5982 Use of RTD is a selectable option, since it is incompatible with
5983 standard Unix calling sequences. If the option is not selected,
5984 the caller must always pop the args.
5986 The attribute stdcall is equivalent to RTD on a per module basis. */
5989 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5993 /* None of the 64-bit ABIs pop arguments. */
5997 ccvt = ix86_get_callcvt (funtype);
5999 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6000 | IX86_CALLCVT_THISCALL)) != 0
6001 && ! stdarg_p (funtype))
6004 /* Lose any fake structure return argument if it is passed on the stack. */
6005 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6006 && !ix86_keep_aggregate_return_pointer (funtype))
6008 int nregs = ix86_function_regparm (funtype, fundecl);
6010 return GET_MODE_SIZE (Pmode);
6016 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6019 ix86_legitimate_combined_insn (rtx_insn *insn)
6021 /* Check operand constraints in case hard registers were propagated
6022 into insn pattern. This check prevents combine pass from
6023 generating insn patterns with invalid hard register operands.
6024 These invalid insns can eventually confuse reload to error out
6025 with a spill failure. See also PRs 46829 and 46843. */
6026 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
6030 extract_insn (insn);
6031 preprocess_constraints (insn);
6033 int n_operands = recog_data.n_operands;
6034 int n_alternatives = recog_data.n_alternatives;
6035 for (i = 0; i < n_operands; i++)
6037 rtx op = recog_data.operand[i];
6038 machine_mode mode = GET_MODE (op);
6039 const operand_alternative *op_alt;
6044 /* For pre-AVX disallow unaligned loads/stores where the
6045 instructions don't support it. */
6047 && VECTOR_MODE_P (GET_MODE (op))
6048 && misaligned_operand (op, GET_MODE (op)))
6050 int min_align = get_attr_ssememalign (insn);
6055 /* A unary operator may be accepted by the predicate, but it
6056 is irrelevant for matching constraints. */
6060 if (GET_CODE (op) == SUBREG)
6062 if (REG_P (SUBREG_REG (op))
6063 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6064 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6065 GET_MODE (SUBREG_REG (op)),
6068 op = SUBREG_REG (op);
6071 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6074 op_alt = recog_op_alt;
6076 /* Operand has no constraints, anything is OK. */
6077 win = !n_alternatives;
6079 alternative_mask preferred = get_preferred_alternatives (insn);
6080 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6082 if (!TEST_BIT (preferred, j))
6084 if (op_alt[i].anything_ok
6085 || (op_alt[i].matches != -1
6087 (recog_data.operand[i],
6088 recog_data.operand[op_alt[i].matches]))
6089 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6104 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6106 static unsigned HOST_WIDE_INT
6107 ix86_asan_shadow_offset (void)
6109 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6110 : HOST_WIDE_INT_C (0x7fff8000))
6111 : (HOST_WIDE_INT_1 << 29);
6114 /* Argument support functions. */
6116 /* Return true when register may be used to pass function parameters. */
6118 ix86_function_arg_regno_p (int regno)
6121 const int *parm_regs;
6123 if (TARGET_MPX && BND_REGNO_P (regno))
6129 return (regno < REGPARM_MAX
6130 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6132 return (regno < REGPARM_MAX
6133 || (TARGET_MMX && MMX_REGNO_P (regno)
6134 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6135 || (TARGET_SSE && SSE_REGNO_P (regno)
6136 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6139 if (TARGET_SSE && SSE_REGNO_P (regno)
6140 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6143 /* TODO: The function should depend on current function ABI but
6144 builtins.c would need updating then. Therefore we use the
6147 /* RAX is used as hidden argument to va_arg functions. */
6148 if (ix86_abi == SYSV_ABI && regno == AX_REG)
6151 if (ix86_abi == MS_ABI)
6152 parm_regs = x86_64_ms_abi_int_parameter_registers;
6154 parm_regs = x86_64_int_parameter_registers;
6155 for (i = 0; i < (ix86_abi == MS_ABI
6156 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6157 if (regno == parm_regs[i])
6162 /* Return if we do not know how to pass TYPE solely in registers. */
6165 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6167 if (must_pass_in_stack_var_size_or_pad (mode, type))
6170 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6171 The layout_type routine is crafty and tries to trick us into passing
6172 currently unsupported vector types on the stack by using TImode. */
6173 return (!TARGET_64BIT && mode == TImode
6174 && type && TREE_CODE (type) != VECTOR_TYPE);
6177 /* It returns the size, in bytes, of the area reserved for arguments passed
6178 in registers for the function represented by fndecl dependent to the used
6181 ix86_reg_parm_stack_space (const_tree fndecl)
6183 enum calling_abi call_abi = SYSV_ABI;
6184 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6185 call_abi = ix86_function_abi (fndecl);
6187 call_abi = ix86_function_type_abi (fndecl);
6188 if (TARGET_64BIT && call_abi == MS_ABI)
6193 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6196 ix86_function_type_abi (const_tree fntype)
6198 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6200 enum calling_abi abi = ix86_abi;
6201 if (abi == SYSV_ABI)
6203 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6207 static bool warned = false;
6210 error ("X32 does not support ms_abi attribute");
6217 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6224 /* We add this as a workaround in order to use libc_has_function
6227 ix86_libc_has_function (enum function_class fn_class)
6229 return targetm.libc_has_function (fn_class);
6233 ix86_function_ms_hook_prologue (const_tree fn)
6235 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6237 if (decl_function_context (fn) != NULL_TREE)
6238 error_at (DECL_SOURCE_LOCATION (fn),
6239 "ms_hook_prologue is not compatible with nested function");
6246 static enum calling_abi
6247 ix86_function_abi (const_tree fndecl)
6251 return ix86_function_type_abi (TREE_TYPE (fndecl));
6254 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6257 ix86_cfun_abi (void)
6261 return cfun->machine->call_abi;
6264 /* Write the extra assembler code needed to declare a function properly. */
6267 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6270 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6274 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6275 unsigned int filler_cc = 0xcccccccc;
6277 for (i = 0; i < filler_count; i += 4)
6278 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6281 #ifdef SUBTARGET_ASM_UNWIND_INIT
6282 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6285 ASM_OUTPUT_LABEL (asm_out_file, fname);
6287 /* Output magic byte marker, if hot-patch attribute is set. */
6292 /* leaq [%rsp + 0], %rsp */
6293 asm_fprintf (asm_out_file, ASM_BYTE
6294 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6298 /* movl.s %edi, %edi
6300 movl.s %esp, %ebp */
6301 asm_fprintf (asm_out_file, ASM_BYTE
6302 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6308 extern void init_regs (void);
6310 /* Implementation of call abi switching target hook. Specific to FNDECL
6311 the specific call register sets are set. See also
6312 ix86_conditional_register_usage for more details. */
6314 ix86_call_abi_override (const_tree fndecl)
6316 if (fndecl == NULL_TREE)
6317 cfun->machine->call_abi = ix86_abi;
6319 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6322 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6323 expensive re-initialization of init_regs each time we switch function context
6324 since this is needed only during RTL expansion. */
6326 ix86_maybe_switch_abi (void)
6329 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6333 /* Return 1 if pseudo register should be created and used to hold
6334 GOT address for PIC code. */
6336 ix86_use_pseudo_pic_reg (void)
6339 && (ix86_cmodel == CM_SMALL_PIC
6346 /* Initialize large model PIC register. */
6349 ix86_init_large_pic_reg (unsigned int tmp_regno)
6351 rtx_code_label *label;
6354 gcc_assert (Pmode == DImode);
6355 label = gen_label_rtx ();
6357 LABEL_PRESERVE_P (label) = 1;
6358 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6359 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6360 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6362 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6363 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6364 pic_offset_table_rtx, tmp_reg));
6367 /* Create and initialize PIC register if required. */
6369 ix86_init_pic_reg (void)
6374 if (!ix86_use_pseudo_pic_reg ())
6381 if (ix86_cmodel == CM_LARGE_PIC)
6382 ix86_init_large_pic_reg (R11_REG);
6384 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6388 /* If there is future mcount call in the function it is more profitable
6389 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6390 rtx reg = crtl->profile
6391 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6392 : pic_offset_table_rtx;
6393 rtx insn = emit_insn (gen_set_got (reg));
6394 RTX_FRAME_RELATED_P (insn) = 1;
6396 emit_move_insn (pic_offset_table_rtx, reg);
6397 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6403 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6404 insert_insn_on_edge (seq, entry_edge);
6405 commit_one_edge_insertion (entry_edge);
6408 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6409 for a call to a function whose data type is FNTYPE.
6410 For a library call, FNTYPE is 0. */
6413 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6414 tree fntype, /* tree ptr for function decl */
6415 rtx libname, /* SYMBOL_REF of library name or 0 */
6419 struct cgraph_local_info *i = NULL;
6420 struct cgraph_node *target = NULL;
6422 memset (cum, 0, sizeof (*cum));
6426 target = cgraph_node::get (fndecl);
6429 target = target->function_symbol ();
6430 i = cgraph_node::local_info (target->decl);
6431 cum->call_abi = ix86_function_abi (target->decl);
6434 cum->call_abi = ix86_function_abi (fndecl);
6437 cum->call_abi = ix86_function_type_abi (fntype);
6439 cum->caller = caller;
6441 /* Set up the number of registers to use for passing arguments. */
6442 cum->nregs = ix86_regparm;
6445 cum->nregs = (cum->call_abi == SYSV_ABI
6446 ? X86_64_REGPARM_MAX
6447 : X86_64_MS_REGPARM_MAX);
6451 cum->sse_nregs = SSE_REGPARM_MAX;
6454 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6455 ? X86_64_SSE_REGPARM_MAX
6456 : X86_64_MS_SSE_REGPARM_MAX);
6460 cum->mmx_nregs = MMX_REGPARM_MAX;
6461 cum->warn_avx512f = true;
6462 cum->warn_avx = true;
6463 cum->warn_sse = true;
6464 cum->warn_mmx = true;
6466 /* Because type might mismatch in between caller and callee, we need to
6467 use actual type of function for local calls.
6468 FIXME: cgraph_analyze can be told to actually record if function uses
6469 va_start so for local functions maybe_vaarg can be made aggressive
6471 FIXME: once typesytem is fixed, we won't need this code anymore. */
6472 if (i && i->local && i->can_change_signature)
6473 fntype = TREE_TYPE (target->decl);
6474 cum->stdarg = stdarg_p (fntype);
6475 cum->maybe_vaarg = (fntype
6476 ? (!prototype_p (fntype) || stdarg_p (fntype))
6479 cum->bnd_regno = FIRST_BND_REG;
6480 cum->bnds_in_bt = 0;
6481 cum->force_bnd_pass = 0;
6485 /* If there are variable arguments, then we won't pass anything
6486 in registers in 32-bit mode. */
6487 if (stdarg_p (fntype))
6492 cum->warn_avx512f = false;
6493 cum->warn_avx = false;
6494 cum->warn_sse = false;
6495 cum->warn_mmx = false;
6499 /* Use ecx and edx registers if function has fastcall attribute,
6500 else look for regparm information. */
6503 unsigned int ccvt = ix86_get_callcvt (fntype);
6504 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6507 cum->fastcall = 1; /* Same first register as in fastcall. */
6509 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6515 cum->nregs = ix86_function_regparm (fntype, fndecl);
6518 /* Set up the number of SSE registers used for passing SFmode
6519 and DFmode arguments. Warn for mismatching ABI. */
6520 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6524 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6525 But in the case of vector types, it is some vector mode.
6527 When we have only some of our vector isa extensions enabled, then there
6528 are some modes for which vector_mode_supported_p is false. For these
6529 modes, the generic vector support in gcc will choose some non-vector mode
6530 in order to implement the type. By computing the natural mode, we'll
6531 select the proper ABI location for the operand and not depend on whatever
6532 the middle-end decides to do with these vector types.
6534 The midde-end can't deal with the vector types > 16 bytes. In this
6535 case, we return the original mode and warn ABI change if CUM isn't
6538 If INT_RETURN is true, warn ABI change if the vector mode isn't
6539 available for function return value. */
6542 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6545 machine_mode mode = TYPE_MODE (type);
6547 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6549 HOST_WIDE_INT size = int_size_in_bytes (type);
6550 if ((size == 8 || size == 16 || size == 32 || size == 64)
6551 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6552 && TYPE_VECTOR_SUBPARTS (type) > 1)
6554 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6556 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6557 mode = MIN_MODE_VECTOR_FLOAT;
6559 mode = MIN_MODE_VECTOR_INT;
6561 /* Get the mode which has this inner mode and number of units. */
6562 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6563 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6564 && GET_MODE_INNER (mode) == innermode)
6566 if (size == 64 && !TARGET_AVX512F)
6568 static bool warnedavx512f;
6569 static bool warnedavx512f_ret;
6571 if (cum && cum->warn_avx512f && !warnedavx512f)
6573 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6574 "without AVX512F enabled changes the ABI"))
6575 warnedavx512f = true;
6577 else if (in_return && !warnedavx512f_ret)
6579 if (warning (OPT_Wpsabi, "AVX512F vector return "
6580 "without AVX512F enabled changes the ABI"))
6581 warnedavx512f_ret = true;
6584 return TYPE_MODE (type);
6586 else if (size == 32 && !TARGET_AVX)
6588 static bool warnedavx;
6589 static bool warnedavx_ret;
6591 if (cum && cum->warn_avx && !warnedavx)
6593 if (warning (OPT_Wpsabi, "AVX vector argument "
6594 "without AVX enabled changes the ABI"))
6597 else if (in_return && !warnedavx_ret)
6599 if (warning (OPT_Wpsabi, "AVX vector return "
6600 "without AVX enabled changes the ABI"))
6601 warnedavx_ret = true;
6604 return TYPE_MODE (type);
6606 else if (((size == 8 && TARGET_64BIT) || size == 16)
6609 static bool warnedsse;
6610 static bool warnedsse_ret;
6612 if (cum && cum->warn_sse && !warnedsse)
6614 if (warning (OPT_Wpsabi, "SSE vector argument "
6615 "without SSE enabled changes the ABI"))
6618 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6620 if (warning (OPT_Wpsabi, "SSE vector return "
6621 "without SSE enabled changes the ABI"))
6622 warnedsse_ret = true;
6625 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6627 static bool warnedmmx;
6628 static bool warnedmmx_ret;
6630 if (cum && cum->warn_mmx && !warnedmmx)
6632 if (warning (OPT_Wpsabi, "MMX vector argument "
6633 "without MMX enabled changes the ABI"))
6636 else if (in_return && !warnedmmx_ret)
6638 if (warning (OPT_Wpsabi, "MMX vector return "
6639 "without MMX enabled changes the ABI"))
6640 warnedmmx_ret = true;
6653 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6654 this may not agree with the mode that the type system has chosen for the
6655 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6656 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6659 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6664 if (orig_mode != BLKmode)
6665 tmp = gen_rtx_REG (orig_mode, regno);
6668 tmp = gen_rtx_REG (mode, regno);
6669 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6670 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6676 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6677 of this code is to classify each 8bytes of incoming argument by the register
6678 class and assign registers accordingly. */
6680 /* Return the union class of CLASS1 and CLASS2.
6681 See the x86-64 PS ABI for details. */
6683 static enum x86_64_reg_class
6684 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6686 /* Rule #1: If both classes are equal, this is the resulting class. */
6687 if (class1 == class2)
6690 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6692 if (class1 == X86_64_NO_CLASS)
6694 if (class2 == X86_64_NO_CLASS)
6697 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6698 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6699 return X86_64_MEMORY_CLASS;
6701 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6702 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6703 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6704 return X86_64_INTEGERSI_CLASS;
6705 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6706 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6707 return X86_64_INTEGER_CLASS;
6709 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6711 if (class1 == X86_64_X87_CLASS
6712 || class1 == X86_64_X87UP_CLASS
6713 || class1 == X86_64_COMPLEX_X87_CLASS
6714 || class2 == X86_64_X87_CLASS
6715 || class2 == X86_64_X87UP_CLASS
6716 || class2 == X86_64_COMPLEX_X87_CLASS)
6717 return X86_64_MEMORY_CLASS;
6719 /* Rule #6: Otherwise class SSE is used. */
6720 return X86_64_SSE_CLASS;
6723 /* Classify the argument of type TYPE and mode MODE.
6724 CLASSES will be filled by the register class used to pass each word
6725 of the operand. The number of words is returned. In case the parameter
6726 should be passed in memory, 0 is returned. As a special case for zero
6727 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6729 BIT_OFFSET is used internally for handling records and specifies offset
6730 of the offset in bits modulo 512 to avoid overflow cases.
6732 See the x86-64 PS ABI for details.
6736 classify_argument (machine_mode mode, const_tree type,
6737 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6739 HOST_WIDE_INT bytes =
6740 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6742 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6744 /* Variable sized entities are always passed/returned in memory. */
6748 if (mode != VOIDmode
6749 && targetm.calls.must_pass_in_stack (mode, type))
6752 if (type && AGGREGATE_TYPE_P (type))
6756 enum x86_64_reg_class subclasses[MAX_CLASSES];
6758 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6762 for (i = 0; i < words; i++)
6763 classes[i] = X86_64_NO_CLASS;
6765 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6766 signalize memory class, so handle it as special case. */
6769 classes[0] = X86_64_NO_CLASS;
6773 /* Classify each field of record and merge classes. */
6774 switch (TREE_CODE (type))
6777 /* And now merge the fields of structure. */
6778 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6780 if (TREE_CODE (field) == FIELD_DECL)
6784 if (TREE_TYPE (field) == error_mark_node)
6787 /* Bitfields are always classified as integer. Handle them
6788 early, since later code would consider them to be
6789 misaligned integers. */
6790 if (DECL_BIT_FIELD (field))
6792 for (i = (int_bit_position (field)
6793 + (bit_offset % 64)) / 8 / 8;
6794 i < ((int_bit_position (field) + (bit_offset % 64))
6795 + tree_to_shwi (DECL_SIZE (field))
6798 merge_classes (X86_64_INTEGER_CLASS,
6805 type = TREE_TYPE (field);
6807 /* Flexible array member is ignored. */
6808 if (TYPE_MODE (type) == BLKmode
6809 && TREE_CODE (type) == ARRAY_TYPE
6810 && TYPE_SIZE (type) == NULL_TREE
6811 && TYPE_DOMAIN (type) != NULL_TREE
6812 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6817 if (!warned && warn_psabi)
6820 inform (input_location,
6821 "the ABI of passing struct with"
6822 " a flexible array member has"
6823 " changed in GCC 4.4");
6827 num = classify_argument (TYPE_MODE (type), type,
6829 (int_bit_position (field)
6830 + bit_offset) % 512);
6833 pos = (int_bit_position (field)
6834 + (bit_offset % 64)) / 8 / 8;
6835 for (i = 0; i < num && (i + pos) < words; i++)
6837 merge_classes (subclasses[i], classes[i + pos]);
6844 /* Arrays are handled as small records. */
6847 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6848 TREE_TYPE (type), subclasses, bit_offset);
6852 /* The partial classes are now full classes. */
6853 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6854 subclasses[0] = X86_64_SSE_CLASS;
6855 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6856 && !((bit_offset % 64) == 0 && bytes == 4))
6857 subclasses[0] = X86_64_INTEGER_CLASS;
6859 for (i = 0; i < words; i++)
6860 classes[i] = subclasses[i % num];
6865 case QUAL_UNION_TYPE:
6866 /* Unions are similar to RECORD_TYPE but offset is always 0.
6868 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6870 if (TREE_CODE (field) == FIELD_DECL)
6874 if (TREE_TYPE (field) == error_mark_node)
6877 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6878 TREE_TYPE (field), subclasses,
6882 for (i = 0; i < num && i < words; i++)
6883 classes[i] = merge_classes (subclasses[i], classes[i]);
6894 /* When size > 16 bytes, if the first one isn't
6895 X86_64_SSE_CLASS or any other ones aren't
6896 X86_64_SSEUP_CLASS, everything should be passed in
6898 if (classes[0] != X86_64_SSE_CLASS)
6901 for (i = 1; i < words; i++)
6902 if (classes[i] != X86_64_SSEUP_CLASS)
6906 /* Final merger cleanup. */
6907 for (i = 0; i < words; i++)
6909 /* If one class is MEMORY, everything should be passed in
6911 if (classes[i] == X86_64_MEMORY_CLASS)
6914 /* The X86_64_SSEUP_CLASS should be always preceded by
6915 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6916 if (classes[i] == X86_64_SSEUP_CLASS
6917 && classes[i - 1] != X86_64_SSE_CLASS
6918 && classes[i - 1] != X86_64_SSEUP_CLASS)
6920 /* The first one should never be X86_64_SSEUP_CLASS. */
6921 gcc_assert (i != 0);
6922 classes[i] = X86_64_SSE_CLASS;
6925 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6926 everything should be passed in memory. */
6927 if (classes[i] == X86_64_X87UP_CLASS
6928 && (classes[i - 1] != X86_64_X87_CLASS))
6932 /* The first one should never be X86_64_X87UP_CLASS. */
6933 gcc_assert (i != 0);
6934 if (!warned && warn_psabi)
6937 inform (input_location,
6938 "the ABI of passing union with long double"
6939 " has changed in GCC 4.4");
6947 /* Compute alignment needed. We align all types to natural boundaries with
6948 exception of XFmode that is aligned to 64bits. */
6949 if (mode != VOIDmode && mode != BLKmode)
6951 int mode_alignment = GET_MODE_BITSIZE (mode);
6954 mode_alignment = 128;
6955 else if (mode == XCmode)
6956 mode_alignment = 256;
6957 if (COMPLEX_MODE_P (mode))
6958 mode_alignment /= 2;
6959 /* Misaligned fields are always returned in memory. */
6960 if (bit_offset % mode_alignment)
6964 /* for V1xx modes, just use the base mode */
6965 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6966 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6967 mode = GET_MODE_INNER (mode);
6969 /* Classification of atomic types. */
6974 classes[0] = X86_64_SSE_CLASS;
6977 classes[0] = X86_64_SSE_CLASS;
6978 classes[1] = X86_64_SSEUP_CLASS;
6988 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6990 /* Analyze last 128 bits only. */
6991 size = (size - 1) & 0x7f;
6995 classes[0] = X86_64_INTEGERSI_CLASS;
7000 classes[0] = X86_64_INTEGER_CLASS;
7003 else if (size < 64+32)
7005 classes[0] = X86_64_INTEGER_CLASS;
7006 classes[1] = X86_64_INTEGERSI_CLASS;
7009 else if (size < 64+64)
7011 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7019 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7023 /* OImode shouldn't be used directly. */
7028 if (!(bit_offset % 64))
7029 classes[0] = X86_64_SSESF_CLASS;
7031 classes[0] = X86_64_SSE_CLASS;
7034 classes[0] = X86_64_SSEDF_CLASS;
7037 classes[0] = X86_64_X87_CLASS;
7038 classes[1] = X86_64_X87UP_CLASS;
7041 classes[0] = X86_64_SSE_CLASS;
7042 classes[1] = X86_64_SSEUP_CLASS;
7045 classes[0] = X86_64_SSE_CLASS;
7046 if (!(bit_offset % 64))
7052 if (!warned && warn_psabi)
7055 inform (input_location,
7056 "the ABI of passing structure with complex float"
7057 " member has changed in GCC 4.4");
7059 classes[1] = X86_64_SSESF_CLASS;
7063 classes[0] = X86_64_SSEDF_CLASS;
7064 classes[1] = X86_64_SSEDF_CLASS;
7067 classes[0] = X86_64_COMPLEX_X87_CLASS;
7070 /* This modes is larger than 16 bytes. */
7078 classes[0] = X86_64_SSE_CLASS;
7079 classes[1] = X86_64_SSEUP_CLASS;
7080 classes[2] = X86_64_SSEUP_CLASS;
7081 classes[3] = X86_64_SSEUP_CLASS;
7089 classes[0] = X86_64_SSE_CLASS;
7090 classes[1] = X86_64_SSEUP_CLASS;
7091 classes[2] = X86_64_SSEUP_CLASS;
7092 classes[3] = X86_64_SSEUP_CLASS;
7093 classes[4] = X86_64_SSEUP_CLASS;
7094 classes[5] = X86_64_SSEUP_CLASS;
7095 classes[6] = X86_64_SSEUP_CLASS;
7096 classes[7] = X86_64_SSEUP_CLASS;
7104 classes[0] = X86_64_SSE_CLASS;
7105 classes[1] = X86_64_SSEUP_CLASS;
7113 classes[0] = X86_64_SSE_CLASS;
7119 gcc_assert (VECTOR_MODE_P (mode));
7124 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7126 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7127 classes[0] = X86_64_INTEGERSI_CLASS;
7129 classes[0] = X86_64_INTEGER_CLASS;
7130 classes[1] = X86_64_INTEGER_CLASS;
7131 return 1 + (bytes > 8);
7135 /* Examine the argument and return set number of register required in each
7136 class. Return true iff parameter should be passed in memory. */
7139 examine_argument (machine_mode mode, const_tree type, int in_return,
7140 int *int_nregs, int *sse_nregs)
7142 enum x86_64_reg_class regclass[MAX_CLASSES];
7143 int n = classify_argument (mode, type, regclass, 0);
7150 for (n--; n >= 0; n--)
7151 switch (regclass[n])
7153 case X86_64_INTEGER_CLASS:
7154 case X86_64_INTEGERSI_CLASS:
7157 case X86_64_SSE_CLASS:
7158 case X86_64_SSESF_CLASS:
7159 case X86_64_SSEDF_CLASS:
7162 case X86_64_NO_CLASS:
7163 case X86_64_SSEUP_CLASS:
7165 case X86_64_X87_CLASS:
7166 case X86_64_X87UP_CLASS:
7167 case X86_64_COMPLEX_X87_CLASS:
7171 case X86_64_MEMORY_CLASS:
7178 /* Construct container for the argument used by GCC interface. See
7179 FUNCTION_ARG for the detailed description. */
7182 construct_container (machine_mode mode, machine_mode orig_mode,
7183 const_tree type, int in_return, int nintregs, int nsseregs,
7184 const int *intreg, int sse_regno)
7186 /* The following variables hold the static issued_error state. */
7187 static bool issued_sse_arg_error;
7188 static bool issued_sse_ret_error;
7189 static bool issued_x87_ret_error;
7191 machine_mode tmpmode;
7193 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7194 enum x86_64_reg_class regclass[MAX_CLASSES];
7198 int needed_sseregs, needed_intregs;
7199 rtx exp[MAX_CLASSES];
7202 n = classify_argument (mode, type, regclass, 0);
7205 if (examine_argument (mode, type, in_return, &needed_intregs,
7208 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7211 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7212 some less clueful developer tries to use floating-point anyway. */
7213 if (needed_sseregs && !TARGET_SSE)
7217 if (!issued_sse_ret_error)
7219 error ("SSE register return with SSE disabled");
7220 issued_sse_ret_error = true;
7223 else if (!issued_sse_arg_error)
7225 error ("SSE register argument with SSE disabled");
7226 issued_sse_arg_error = true;
7231 /* Likewise, error if the ABI requires us to return values in the
7232 x87 registers and the user specified -mno-80387. */
7233 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7234 for (i = 0; i < n; i++)
7235 if (regclass[i] == X86_64_X87_CLASS
7236 || regclass[i] == X86_64_X87UP_CLASS
7237 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7239 if (!issued_x87_ret_error)
7241 error ("x87 register return with x87 disabled");
7242 issued_x87_ret_error = true;
7247 /* First construct simple cases. Avoid SCmode, since we want to use
7248 single register to pass this type. */
7249 if (n == 1 && mode != SCmode)
7250 switch (regclass[0])
7252 case X86_64_INTEGER_CLASS:
7253 case X86_64_INTEGERSI_CLASS:
7254 return gen_rtx_REG (mode, intreg[0]);
7255 case X86_64_SSE_CLASS:
7256 case X86_64_SSESF_CLASS:
7257 case X86_64_SSEDF_CLASS:
7258 if (mode != BLKmode)
7259 return gen_reg_or_parallel (mode, orig_mode,
7260 SSE_REGNO (sse_regno));
7262 case X86_64_X87_CLASS:
7263 case X86_64_COMPLEX_X87_CLASS:
7264 return gen_rtx_REG (mode, FIRST_STACK_REG);
7265 case X86_64_NO_CLASS:
7266 /* Zero sized array, struct or class. */
7272 && regclass[0] == X86_64_SSE_CLASS
7273 && regclass[1] == X86_64_SSEUP_CLASS
7275 return gen_reg_or_parallel (mode, orig_mode,
7276 SSE_REGNO (sse_regno));
7278 && regclass[0] == X86_64_SSE_CLASS
7279 && regclass[1] == X86_64_SSEUP_CLASS
7280 && regclass[2] == X86_64_SSEUP_CLASS
7281 && regclass[3] == X86_64_SSEUP_CLASS
7283 return gen_reg_or_parallel (mode, orig_mode,
7284 SSE_REGNO (sse_regno));
7286 && regclass[0] == X86_64_SSE_CLASS
7287 && regclass[1] == X86_64_SSEUP_CLASS
7288 && regclass[2] == X86_64_SSEUP_CLASS
7289 && regclass[3] == X86_64_SSEUP_CLASS
7290 && regclass[4] == X86_64_SSEUP_CLASS
7291 && regclass[5] == X86_64_SSEUP_CLASS
7292 && regclass[6] == X86_64_SSEUP_CLASS
7293 && regclass[7] == X86_64_SSEUP_CLASS
7295 return gen_reg_or_parallel (mode, orig_mode,
7296 SSE_REGNO (sse_regno));
7298 && regclass[0] == X86_64_X87_CLASS
7299 && regclass[1] == X86_64_X87UP_CLASS)
7300 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7303 && regclass[0] == X86_64_INTEGER_CLASS
7304 && regclass[1] == X86_64_INTEGER_CLASS
7305 && (mode == CDImode || mode == TImode)
7306 && intreg[0] + 1 == intreg[1])
7307 return gen_rtx_REG (mode, intreg[0]);
7309 /* Otherwise figure out the entries of the PARALLEL. */
7310 for (i = 0; i < n; i++)
7314 switch (regclass[i])
7316 case X86_64_NO_CLASS:
7318 case X86_64_INTEGER_CLASS:
7319 case X86_64_INTEGERSI_CLASS:
7320 /* Merge TImodes on aligned occasions here too. */
7321 if (i * 8 + 8 > bytes)
7323 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7324 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7328 /* We've requested 24 bytes we
7329 don't have mode for. Use DImode. */
7330 if (tmpmode == BLKmode)
7333 = gen_rtx_EXPR_LIST (VOIDmode,
7334 gen_rtx_REG (tmpmode, *intreg),
7338 case X86_64_SSESF_CLASS:
7340 = gen_rtx_EXPR_LIST (VOIDmode,
7341 gen_rtx_REG (SFmode,
7342 SSE_REGNO (sse_regno)),
7346 case X86_64_SSEDF_CLASS:
7348 = gen_rtx_EXPR_LIST (VOIDmode,
7349 gen_rtx_REG (DFmode,
7350 SSE_REGNO (sse_regno)),
7354 case X86_64_SSE_CLASS:
7362 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7372 && regclass[1] == X86_64_SSEUP_CLASS
7373 && regclass[2] == X86_64_SSEUP_CLASS
7374 && regclass[3] == X86_64_SSEUP_CLASS);
7380 && regclass[1] == X86_64_SSEUP_CLASS
7381 && regclass[2] == X86_64_SSEUP_CLASS
7382 && regclass[3] == X86_64_SSEUP_CLASS
7383 && regclass[4] == X86_64_SSEUP_CLASS
7384 && regclass[5] == X86_64_SSEUP_CLASS
7385 && regclass[6] == X86_64_SSEUP_CLASS
7386 && regclass[7] == X86_64_SSEUP_CLASS);
7394 = gen_rtx_EXPR_LIST (VOIDmode,
7395 gen_rtx_REG (tmpmode,
7396 SSE_REGNO (sse_regno)),
7405 /* Empty aligned struct, union or class. */
7409 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7410 for (i = 0; i < nexps; i++)
7411 XVECEXP (ret, 0, i) = exp [i];
7415 /* Update the data in CUM to advance over an argument of mode MODE
7416 and data type TYPE. (TYPE is null for libcalls where that information
7417 may not be available.)
7419 Return a number of integer regsiters advanced over. */
7422 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7423 const_tree type, HOST_WIDE_INT bytes,
7424 HOST_WIDE_INT words)
7442 cum->words += words;
7443 cum->nregs -= words;
7444 cum->regno += words;
7445 if (cum->nregs >= 0)
7447 if (cum->nregs <= 0)
7455 /* OImode shouldn't be used directly. */
7459 if (cum->float_in_sse < 2)
7462 if (cum->float_in_sse < 1)
7485 if (!type || !AGGREGATE_TYPE_P (type))
7487 cum->sse_words += words;
7488 cum->sse_nregs -= 1;
7489 cum->sse_regno += 1;
7490 if (cum->sse_nregs <= 0)
7504 if (!type || !AGGREGATE_TYPE_P (type))
7506 cum->mmx_words += words;
7507 cum->mmx_nregs -= 1;
7508 cum->mmx_regno += 1;
7509 if (cum->mmx_nregs <= 0)
7522 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7523 const_tree type, HOST_WIDE_INT words, bool named)
7525 int int_nregs, sse_nregs;
7527 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7528 if (!named && (VALID_AVX512F_REG_MODE (mode)
7529 || VALID_AVX256_REG_MODE (mode)))
7532 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7533 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7535 cum->nregs -= int_nregs;
7536 cum->sse_nregs -= sse_nregs;
7537 cum->regno += int_nregs;
7538 cum->sse_regno += sse_nregs;
7543 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7544 cum->words = (cum->words + align - 1) & ~(align - 1);
7545 cum->words += words;
7551 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7552 HOST_WIDE_INT words)
7554 /* Otherwise, this should be passed indirect. */
7555 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7557 cum->words += words;
7567 /* Update the data in CUM to advance over an argument of mode MODE and
7568 data type TYPE. (TYPE is null for libcalls where that information
7569 may not be available.) */
7572 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7573 const_tree type, bool named)
7575 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7576 HOST_WIDE_INT bytes, words;
7579 if (mode == BLKmode)
7580 bytes = int_size_in_bytes (type);
7582 bytes = GET_MODE_SIZE (mode);
7583 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7586 mode = type_natural_mode (type, NULL, false);
7588 if ((type && POINTER_BOUNDS_TYPE_P (type))
7589 || POINTER_BOUNDS_MODE_P (mode))
7591 /* If we pass bounds in BT then just update remained bounds count. */
7592 if (cum->bnds_in_bt)
7598 /* Update remained number of bounds to force. */
7599 if (cum->force_bnd_pass)
7600 cum->force_bnd_pass--;
7607 /* The first arg not going to Bounds Tables resets this counter. */
7608 cum->bnds_in_bt = 0;
7609 /* For unnamed args we always pass bounds to avoid bounds mess when
7610 passed and received types do not match. If bounds do not follow
7611 unnamed arg, still pretend required number of bounds were passed. */
7612 if (cum->force_bnd_pass)
7614 cum->bnd_regno += cum->force_bnd_pass;
7615 cum->force_bnd_pass = 0;
7618 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7619 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7620 else if (TARGET_64BIT)
7621 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7623 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7625 /* For stdarg we expect bounds to be passed for each value passed
7628 cum->force_bnd_pass = nregs;
7629 /* For pointers passed in memory we expect bounds passed in Bounds
7632 cum->bnds_in_bt = chkp_type_bounds_count (type);
7635 /* Define where to put the arguments to a function.
7636 Value is zero to push the argument on the stack,
7637 or a hard register in which to store the argument.
7639 MODE is the argument's machine mode.
7640 TYPE is the data type of the argument (as a tree).
7641 This is null for libcalls where that information may
7643 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7644 the preceding args and about the function being called.
7645 NAMED is nonzero if this argument is a named parameter
7646 (otherwise it is an extra parameter matching an ellipsis). */
7649 function_arg_32 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7650 machine_mode orig_mode, const_tree type,
7651 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7653 /* Avoid the AL settings for the Unix64 ABI. */
7654 if (mode == VOIDmode)
7670 if (words <= cum->nregs)
7672 int regno = cum->regno;
7674 /* Fastcall allocates the first two DWORD (SImode) or
7675 smaller arguments to ECX and EDX if it isn't an
7681 || (type && AGGREGATE_TYPE_P (type)))
7684 /* ECX not EAX is the first allocated register. */
7685 if (regno == AX_REG)
7688 return gen_rtx_REG (mode, regno);
7693 if (cum->float_in_sse < 2)
7696 if (cum->float_in_sse < 1)
7700 /* In 32bit, we pass TImode in xmm registers. */
7707 if (!type || !AGGREGATE_TYPE_P (type))
7710 return gen_reg_or_parallel (mode, orig_mode,
7711 cum->sse_regno + FIRST_SSE_REG);
7717 /* OImode and XImode shouldn't be used directly. */
7732 if (!type || !AGGREGATE_TYPE_P (type))
7735 return gen_reg_or_parallel (mode, orig_mode,
7736 cum->sse_regno + FIRST_SSE_REG);
7746 if (!type || !AGGREGATE_TYPE_P (type))
7749 return gen_reg_or_parallel (mode, orig_mode,
7750 cum->mmx_regno + FIRST_MMX_REG);
7759 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7760 machine_mode orig_mode, const_tree type, bool named)
7762 /* Handle a hidden AL argument containing number of registers
7763 for varargs x86-64 functions. */
7764 if (mode == VOIDmode)
7765 return GEN_INT (cum->maybe_vaarg
7766 ? (cum->sse_nregs < 0
7767 ? X86_64_SSE_REGPARM_MAX
7788 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7794 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7796 &x86_64_int_parameter_registers [cum->regno],
7801 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7802 machine_mode orig_mode, bool named,
7803 HOST_WIDE_INT bytes)
7807 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7808 We use value of -2 to specify that current function call is MSABI. */
7809 if (mode == VOIDmode)
7810 return GEN_INT (-2);
7812 /* If we've run out of registers, it goes on the stack. */
7813 if (cum->nregs == 0)
7816 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7818 /* Only floating point modes are passed in anything but integer regs. */
7819 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7822 regno = cum->regno + FIRST_SSE_REG;
7827 /* Unnamed floating parameters are passed in both the
7828 SSE and integer registers. */
7829 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7830 t2 = gen_rtx_REG (mode, regno);
7831 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7832 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7833 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7836 /* Handle aggregated types passed in register. */
7837 if (orig_mode == BLKmode)
7839 if (bytes > 0 && bytes <= 8)
7840 mode = (bytes > 4 ? DImode : SImode);
7841 if (mode == BLKmode)
7845 return gen_reg_or_parallel (mode, orig_mode, regno);
7848 /* Return where to put the arguments to a function.
7849 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7851 MODE is the argument's machine mode. TYPE is the data type of the
7852 argument. It is null for libcalls where that information may not be
7853 available. CUM gives information about the preceding args and about
7854 the function being called. NAMED is nonzero if this argument is a
7855 named parameter (otherwise it is an extra parameter matching an
7859 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7860 const_tree type, bool named)
7862 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7863 machine_mode mode = omode;
7864 HOST_WIDE_INT bytes, words;
7867 /* All pointer bounds argumntas are handled separately here. */
7868 if ((type && POINTER_BOUNDS_TYPE_P (type))
7869 || POINTER_BOUNDS_MODE_P (mode))
7871 /* Return NULL if bounds are forced to go in Bounds Table. */
7872 if (cum->bnds_in_bt)
7874 /* Return the next available bound reg if any. */
7875 else if (cum->bnd_regno <= LAST_BND_REG)
7876 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7877 /* Return the next special slot number otherwise. */
7879 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7884 if (mode == BLKmode)
7885 bytes = int_size_in_bytes (type);
7887 bytes = GET_MODE_SIZE (mode);
7888 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7890 /* To simplify the code below, represent vector types with a vector mode
7891 even if MMX/SSE are not active. */
7892 if (type && TREE_CODE (type) == VECTOR_TYPE)
7893 mode = type_natural_mode (type, cum, false);
7895 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7896 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7897 else if (TARGET_64BIT)
7898 arg = function_arg_64 (cum, mode, omode, type, named);
7900 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7905 /* A C expression that indicates when an argument must be passed by
7906 reference. If nonzero for an argument, a copy of that argument is
7907 made in memory and a pointer to the argument is passed instead of
7908 the argument itself. The pointer is passed in whatever way is
7909 appropriate for passing a pointer to that type. */
7912 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7913 const_tree type, bool)
7915 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7917 /* Bounds are never passed by reference. */
7918 if ((type && POINTER_BOUNDS_TYPE_P (type))
7919 || POINTER_BOUNDS_MODE_P (mode))
7922 /* See Windows x64 Software Convention. */
7923 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7925 int msize = (int) GET_MODE_SIZE (mode);
7928 /* Arrays are passed by reference. */
7929 if (TREE_CODE (type) == ARRAY_TYPE)
7932 if (AGGREGATE_TYPE_P (type))
7934 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7935 are passed by reference. */
7936 msize = int_size_in_bytes (type);
7940 /* __m128 is passed by reference. */
7942 case 1: case 2: case 4: case 8:
7948 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7954 /* Return true when TYPE should be 128bit aligned for 32bit argument
7955 passing ABI. XXX: This function is obsolete and is only used for
7956 checking psABI compatibility with previous versions of GCC. */
7959 ix86_compat_aligned_value_p (const_tree type)
7961 machine_mode mode = TYPE_MODE (type);
7962 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7966 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7968 if (TYPE_ALIGN (type) < 128)
7971 if (AGGREGATE_TYPE_P (type))
7973 /* Walk the aggregates recursively. */
7974 switch (TREE_CODE (type))
7978 case QUAL_UNION_TYPE:
7982 /* Walk all the structure fields. */
7983 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7985 if (TREE_CODE (field) == FIELD_DECL
7986 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7993 /* Just for use if some languages passes arrays by value. */
7994 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
8005 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8006 XXX: This function is obsolete and is only used for checking psABI
8007 compatibility with previous versions of GCC. */
8010 ix86_compat_function_arg_boundary (machine_mode mode,
8011 const_tree type, unsigned int align)
8013 /* In 32bit, only _Decimal128 and __float128 are aligned to their
8014 natural boundaries. */
8015 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
8017 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
8018 make an exception for SSE modes since these require 128bit
8021 The handling here differs from field_alignment. ICC aligns MMX
8022 arguments to 4 byte boundaries, while structure fields are aligned
8023 to 8 byte boundaries. */
8026 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
8027 align = PARM_BOUNDARY;
8031 if (!ix86_compat_aligned_value_p (type))
8032 align = PARM_BOUNDARY;
8035 if (align > BIGGEST_ALIGNMENT)
8036 align = BIGGEST_ALIGNMENT;
8040 /* Return true when TYPE should be 128bit aligned for 32bit argument
8044 ix86_contains_aligned_value_p (const_tree type)
8046 machine_mode mode = TYPE_MODE (type);
8048 if (mode == XFmode || mode == XCmode)
8051 if (TYPE_ALIGN (type) < 128)
8054 if (AGGREGATE_TYPE_P (type))
8056 /* Walk the aggregates recursively. */
8057 switch (TREE_CODE (type))
8061 case QUAL_UNION_TYPE:
8065 /* Walk all the structure fields. */
8066 for (field = TYPE_FIELDS (type);
8068 field = DECL_CHAIN (field))
8070 if (TREE_CODE (field) == FIELD_DECL
8071 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8078 /* Just for use if some languages passes arrays by value. */
8079 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8088 return TYPE_ALIGN (type) >= 128;
8093 /* Gives the alignment boundary, in bits, of an argument with the
8094 specified mode and type. */
8097 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8102 /* Since the main variant type is used for call, we convert it to
8103 the main variant type. */
8104 type = TYPE_MAIN_VARIANT (type);
8105 align = TYPE_ALIGN (type);
8108 align = GET_MODE_ALIGNMENT (mode);
8109 if (align < PARM_BOUNDARY)
8110 align = PARM_BOUNDARY;
8114 unsigned int saved_align = align;
8118 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8121 if (mode == XFmode || mode == XCmode)
8122 align = PARM_BOUNDARY;
8124 else if (!ix86_contains_aligned_value_p (type))
8125 align = PARM_BOUNDARY;
8128 align = PARM_BOUNDARY;
8133 && align != ix86_compat_function_arg_boundary (mode, type,
8137 inform (input_location,
8138 "The ABI for passing parameters with %d-byte"
8139 " alignment has changed in GCC 4.6",
8140 align / BITS_PER_UNIT);
8147 /* Return true if N is a possible register number of function value. */
8150 ix86_function_value_regno_p (const unsigned int regno)
8157 return (!TARGET_64BIT || ix86_abi != MS_ABI);
8160 return TARGET_64BIT && ix86_abi != MS_ABI;
8163 return chkp_function_instrumented_p (current_function_decl);
8165 /* Complex values are returned in %st(0)/%st(1) pair. */
8168 /* TODO: The function should depend on current function ABI but
8169 builtins.c would need updating then. Therefore we use the
8171 if (TARGET_64BIT && ix86_abi == MS_ABI)
8173 return TARGET_FLOAT_RETURNS_IN_80387;
8175 /* Complex values are returned in %xmm0/%xmm1 pair. */
8181 if (TARGET_MACHO || TARGET_64BIT)
8189 /* Define how to find the value returned by a function.
8190 VALTYPE is the data type of the value (as a tree).
8191 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8192 otherwise, FUNC is 0. */
8195 function_value_32 (machine_mode orig_mode, machine_mode mode,
8196 const_tree fntype, const_tree fn)
8200 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8201 we normally prevent this case when mmx is not available. However
8202 some ABIs may require the result to be returned like DImode. */
8203 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8204 regno = FIRST_MMX_REG;
8206 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8207 we prevent this case when sse is not available. However some ABIs
8208 may require the result to be returned like integer TImode. */
8209 else if (mode == TImode
8210 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8211 regno = FIRST_SSE_REG;
8213 /* 32-byte vector modes in %ymm0. */
8214 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8215 regno = FIRST_SSE_REG;
8217 /* 64-byte vector modes in %zmm0. */
8218 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8219 regno = FIRST_SSE_REG;
8221 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8222 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8223 regno = FIRST_FLOAT_REG;
8225 /* Most things go in %eax. */
8228 /* Override FP return register with %xmm0 for local functions when
8229 SSE math is enabled or for functions with sseregparm attribute. */
8230 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8232 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8233 if ((sse_level >= 1 && mode == SFmode)
8234 || (sse_level == 2 && mode == DFmode))
8235 regno = FIRST_SSE_REG;
8238 /* OImode shouldn't be used directly. */
8239 gcc_assert (mode != OImode);
8241 return gen_rtx_REG (orig_mode, regno);
8245 function_value_64 (machine_mode orig_mode, machine_mode mode,
8250 /* Handle libcalls, which don't provide a type node. */
8251 if (valtype == NULL)
8265 regno = FIRST_SSE_REG;
8269 regno = FIRST_FLOAT_REG;
8277 return gen_rtx_REG (mode, regno);
8279 else if (POINTER_TYPE_P (valtype))
8281 /* Pointers are always returned in word_mode. */
8285 ret = construct_container (mode, orig_mode, valtype, 1,
8286 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8287 x86_64_int_return_registers, 0);
8289 /* For zero sized structures, construct_container returns NULL, but we
8290 need to keep rest of compiler happy by returning meaningful value. */
8292 ret = gen_rtx_REG (orig_mode, AX_REG);
8298 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8301 unsigned int regno = AX_REG;
8305 switch (GET_MODE_SIZE (mode))
8308 if (valtype != NULL_TREE
8309 && !VECTOR_INTEGER_TYPE_P (valtype)
8310 && !VECTOR_INTEGER_TYPE_P (valtype)
8311 && !INTEGRAL_TYPE_P (valtype)
8312 && !VECTOR_FLOAT_TYPE_P (valtype))
8314 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8315 && !COMPLEX_MODE_P (mode))
8316 regno = FIRST_SSE_REG;
8320 if (mode == SFmode || mode == DFmode)
8321 regno = FIRST_SSE_REG;
8327 return gen_rtx_REG (orig_mode, regno);
8331 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8332 machine_mode orig_mode, machine_mode mode)
8334 const_tree fn, fntype;
8337 if (fntype_or_decl && DECL_P (fntype_or_decl))
8338 fn = fntype_or_decl;
8339 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8341 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8342 || POINTER_BOUNDS_MODE_P (mode))
8343 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8344 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8345 return function_value_ms_64 (orig_mode, mode, valtype);
8346 else if (TARGET_64BIT)
8347 return function_value_64 (orig_mode, mode, valtype);
8349 return function_value_32 (orig_mode, mode, fntype, fn);
8353 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8355 machine_mode mode, orig_mode;
8357 orig_mode = TYPE_MODE (valtype);
8358 mode = type_natural_mode (valtype, NULL, true);
8359 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8362 /* Return an RTX representing a place where a function returns
8363 or recieves pointer bounds or NULL if no bounds are returned.
8365 VALTYPE is a data type of a value returned by the function.
8367 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8368 or FUNCTION_TYPE of the function.
8370 If OUTGOING is false, return a place in which the caller will
8371 see the return value. Otherwise, return a place where a
8372 function returns a value. */
8375 ix86_function_value_bounds (const_tree valtype,
8376 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8377 bool outgoing ATTRIBUTE_UNUSED)
8381 if (BOUNDED_TYPE_P (valtype))
8382 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8383 else if (chkp_type_has_pointer (valtype))
8388 unsigned i, bnd_no = 0;
8390 bitmap_obstack_initialize (NULL);
8391 slots = BITMAP_ALLOC (NULL);
8392 chkp_find_bound_slots (valtype, slots);
8394 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8396 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8397 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8398 gcc_assert (bnd_no < 2);
8399 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8402 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8404 BITMAP_FREE (slots);
8405 bitmap_obstack_release (NULL);
8413 /* Pointer function arguments and return values are promoted to
8417 ix86_promote_function_mode (const_tree type, machine_mode mode,
8418 int *punsignedp, const_tree fntype,
8421 if (type != NULL_TREE && POINTER_TYPE_P (type))
8423 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8426 return default_promote_function_mode (type, mode, punsignedp, fntype,
8430 /* Return true if a structure, union or array with MODE containing FIELD
8431 should be accessed using BLKmode. */
8434 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8436 /* Union with XFmode must be in BLKmode. */
8437 return (mode == XFmode
8438 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8439 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8443 ix86_libcall_value (machine_mode mode)
8445 return ix86_function_value_1 (NULL, NULL, mode, mode);
8448 /* Return true iff type is returned in memory. */
8451 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8453 #ifdef SUBTARGET_RETURN_IN_MEMORY
8454 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8456 const machine_mode mode = type_natural_mode (type, NULL, true);
8459 if (POINTER_BOUNDS_TYPE_P (type))
8464 if (ix86_function_type_abi (fntype) == MS_ABI)
8466 size = int_size_in_bytes (type);
8468 /* __m128 is returned in xmm0. */
8469 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8470 || INTEGRAL_TYPE_P (type)
8471 || VECTOR_FLOAT_TYPE_P (type))
8472 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8473 && !COMPLEX_MODE_P (mode)
8474 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8477 /* Otherwise, the size must be exactly in [1248]. */
8478 return size != 1 && size != 2 && size != 4 && size != 8;
8482 int needed_intregs, needed_sseregs;
8484 return examine_argument (mode, type, 1,
8485 &needed_intregs, &needed_sseregs);
8490 if (mode == BLKmode)
8493 size = int_size_in_bytes (type);
8495 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8498 if (VECTOR_MODE_P (mode) || mode == TImode)
8500 /* User-created vectors small enough to fit in EAX. */
8504 /* Unless ABI prescibes otherwise,
8505 MMX/3dNow values are returned in MM0 if available. */
8508 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8510 /* SSE values are returned in XMM0 if available. */
8514 /* AVX values are returned in YMM0 if available. */
8518 /* AVX512F values are returned in ZMM0 if available. */
8520 return !TARGET_AVX512F;
8529 /* OImode shouldn't be used directly. */
8530 gcc_assert (mode != OImode);
8538 /* Create the va_list data type. */
8540 /* Returns the calling convention specific va_list date type.
8541 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8544 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8546 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8548 /* For i386 we use plain pointer to argument area. */
8549 if (!TARGET_64BIT || abi == MS_ABI)
8550 return build_pointer_type (char_type_node);
8552 record = lang_hooks.types.make_type (RECORD_TYPE);
8553 type_decl = build_decl (BUILTINS_LOCATION,
8554 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8556 f_gpr = build_decl (BUILTINS_LOCATION,
8557 FIELD_DECL, get_identifier ("gp_offset"),
8558 unsigned_type_node);
8559 f_fpr = build_decl (BUILTINS_LOCATION,
8560 FIELD_DECL, get_identifier ("fp_offset"),
8561 unsigned_type_node);
8562 f_ovf = build_decl (BUILTINS_LOCATION,
8563 FIELD_DECL, get_identifier ("overflow_arg_area"),
8565 f_sav = build_decl (BUILTINS_LOCATION,
8566 FIELD_DECL, get_identifier ("reg_save_area"),
8569 va_list_gpr_counter_field = f_gpr;
8570 va_list_fpr_counter_field = f_fpr;
8572 DECL_FIELD_CONTEXT (f_gpr) = record;
8573 DECL_FIELD_CONTEXT (f_fpr) = record;
8574 DECL_FIELD_CONTEXT (f_ovf) = record;
8575 DECL_FIELD_CONTEXT (f_sav) = record;
8577 TYPE_STUB_DECL (record) = type_decl;
8578 TYPE_NAME (record) = type_decl;
8579 TYPE_FIELDS (record) = f_gpr;
8580 DECL_CHAIN (f_gpr) = f_fpr;
8581 DECL_CHAIN (f_fpr) = f_ovf;
8582 DECL_CHAIN (f_ovf) = f_sav;
8584 layout_type (record);
8586 /* The correct type is an array type of one element. */
8587 return build_array_type (record, build_index_type (size_zero_node));
8590 /* Setup the builtin va_list data type and for 64-bit the additional
8591 calling convention specific va_list data types. */
8594 ix86_build_builtin_va_list (void)
8596 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8598 /* Initialize abi specific va_list builtin types. */
8602 if (ix86_abi == MS_ABI)
8604 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8605 if (TREE_CODE (t) != RECORD_TYPE)
8606 t = build_variant_type_copy (t);
8607 sysv_va_list_type_node = t;
8612 if (TREE_CODE (t) != RECORD_TYPE)
8613 t = build_variant_type_copy (t);
8614 sysv_va_list_type_node = t;
8616 if (ix86_abi != MS_ABI)
8618 t = ix86_build_builtin_va_list_abi (MS_ABI);
8619 if (TREE_CODE (t) != RECORD_TYPE)
8620 t = build_variant_type_copy (t);
8621 ms_va_list_type_node = t;
8626 if (TREE_CODE (t) != RECORD_TYPE)
8627 t = build_variant_type_copy (t);
8628 ms_va_list_type_node = t;
8635 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8638 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8644 /* GPR size of varargs save area. */
8645 if (cfun->va_list_gpr_size)
8646 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8648 ix86_varargs_gpr_size = 0;
8650 /* FPR size of varargs save area. We don't need it if we don't pass
8651 anything in SSE registers. */
8652 if (TARGET_SSE && cfun->va_list_fpr_size)
8653 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8655 ix86_varargs_fpr_size = 0;
8657 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8660 save_area = frame_pointer_rtx;
8661 set = get_varargs_alias_set ();
8663 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8664 if (max > X86_64_REGPARM_MAX)
8665 max = X86_64_REGPARM_MAX;
8667 for (i = cum->regno; i < max; i++)
8669 mem = gen_rtx_MEM (word_mode,
8670 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8671 MEM_NOTRAP_P (mem) = 1;
8672 set_mem_alias_set (mem, set);
8673 emit_move_insn (mem,
8674 gen_rtx_REG (word_mode,
8675 x86_64_int_parameter_registers[i]));
8678 if (ix86_varargs_fpr_size)
8681 rtx_code_label *label;
8684 /* Now emit code to save SSE registers. The AX parameter contains number
8685 of SSE parameter registers used to call this function, though all we
8686 actually check here is the zero/non-zero status. */
8688 label = gen_label_rtx ();
8689 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8690 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8693 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8694 we used movdqa (i.e. TImode) instead? Perhaps even better would
8695 be if we could determine the real mode of the data, via a hook
8696 into pass_stdarg. Ignore all that for now. */
8698 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8699 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8701 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8702 if (max > X86_64_SSE_REGPARM_MAX)
8703 max = X86_64_SSE_REGPARM_MAX;
8705 for (i = cum->sse_regno; i < max; ++i)
8707 mem = plus_constant (Pmode, save_area,
8708 i * 16 + ix86_varargs_gpr_size);
8709 mem = gen_rtx_MEM (smode, mem);
8710 MEM_NOTRAP_P (mem) = 1;
8711 set_mem_alias_set (mem, set);
8712 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8714 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8722 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8724 alias_set_type set = get_varargs_alias_set ();
8727 /* Reset to zero, as there might be a sysv vaarg used
8729 ix86_varargs_gpr_size = 0;
8730 ix86_varargs_fpr_size = 0;
8732 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8736 mem = gen_rtx_MEM (Pmode,
8737 plus_constant (Pmode, virtual_incoming_args_rtx,
8738 i * UNITS_PER_WORD));
8739 MEM_NOTRAP_P (mem) = 1;
8740 set_mem_alias_set (mem, set);
8742 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8743 emit_move_insn (mem, reg);
8748 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8749 tree type, int *, int no_rtl)
8751 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8752 CUMULATIVE_ARGS next_cum;
8755 /* This argument doesn't appear to be used anymore. Which is good,
8756 because the old code here didn't suppress rtl generation. */
8757 gcc_assert (!no_rtl);
8762 fntype = TREE_TYPE (current_function_decl);
8764 /* For varargs, we do not want to skip the dummy va_dcl argument.
8765 For stdargs, we do want to skip the last named argument. */
8767 if (stdarg_p (fntype))
8768 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8771 if (cum->call_abi == MS_ABI)
8772 setup_incoming_varargs_ms_64 (&next_cum);
8774 setup_incoming_varargs_64 (&next_cum);
8778 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8779 enum machine_mode mode,
8781 int *pretend_size ATTRIBUTE_UNUSED,
8784 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8785 CUMULATIVE_ARGS next_cum;
8788 int bnd_reg, i, max;
8790 gcc_assert (!no_rtl);
8792 /* Do nothing if we use plain pointer to argument area. */
8793 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8796 fntype = TREE_TYPE (current_function_decl);
8798 /* For varargs, we do not want to skip the dummy va_dcl argument.
8799 For stdargs, we do want to skip the last named argument. */
8801 if (stdarg_p (fntype))
8802 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8804 save_area = frame_pointer_rtx;
8806 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8807 if (max > X86_64_REGPARM_MAX)
8808 max = X86_64_REGPARM_MAX;
8810 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8811 if (chkp_function_instrumented_p (current_function_decl))
8812 for (i = cum->regno; i < max; i++)
8814 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8815 rtx reg = gen_rtx_REG (DImode,
8816 x86_64_int_parameter_registers[i]);
8820 if (bnd_reg <= LAST_BND_REG)
8821 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8825 plus_constant (Pmode, arg_pointer_rtx,
8826 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8827 bounds = gen_reg_rtx (BNDmode);
8828 emit_insn (BNDmode == BND64mode
8829 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8830 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8833 emit_insn (BNDmode == BND64mode
8834 ? gen_bnd64_stx (addr, ptr, bounds)
8835 : gen_bnd32_stx (addr, ptr, bounds));
8842 /* Checks if TYPE is of kind va_list char *. */
8845 is_va_list_char_pointer (tree type)
8849 /* For 32-bit it is always true. */
8852 canonic = ix86_canonical_va_list_type (type);
8853 return (canonic == ms_va_list_type_node
8854 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8857 /* Implement va_start. */
8860 ix86_va_start (tree valist, rtx nextarg)
8862 HOST_WIDE_INT words, n_gpr, n_fpr;
8863 tree f_gpr, f_fpr, f_ovf, f_sav;
8864 tree gpr, fpr, ovf, sav, t;
8868 if (flag_split_stack
8869 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8871 unsigned int scratch_regno;
8873 /* When we are splitting the stack, we can't refer to the stack
8874 arguments using internal_arg_pointer, because they may be on
8875 the old stack. The split stack prologue will arrange to
8876 leave a pointer to the old stack arguments in a scratch
8877 register, which we here copy to a pseudo-register. The split
8878 stack prologue can't set the pseudo-register directly because
8879 it (the prologue) runs before any registers have been saved. */
8881 scratch_regno = split_stack_prologue_scratch_regno ();
8882 if (scratch_regno != INVALID_REGNUM)
8887 reg = gen_reg_rtx (Pmode);
8888 cfun->machine->split_stack_varargs_pointer = reg;
8891 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8895 push_topmost_sequence ();
8896 emit_insn_after (seq, entry_of_function ());
8897 pop_topmost_sequence ();
8901 /* Only 64bit target needs something special. */
8902 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8904 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8905 std_expand_builtin_va_start (valist, nextarg);
8910 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8911 next = expand_binop (ptr_mode, add_optab,
8912 cfun->machine->split_stack_varargs_pointer,
8913 crtl->args.arg_offset_rtx,
8914 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8915 convert_move (va_r, next, 0);
8917 /* Store zero bounds for va_list. */
8918 if (chkp_function_instrumented_p (current_function_decl))
8919 chkp_expand_bounds_reset_for_mem (valist,
8920 make_tree (TREE_TYPE (valist),
8927 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8928 f_fpr = DECL_CHAIN (f_gpr);
8929 f_ovf = DECL_CHAIN (f_fpr);
8930 f_sav = DECL_CHAIN (f_ovf);
8932 valist = build_simple_mem_ref (valist);
8933 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8934 /* The following should be folded into the MEM_REF offset. */
8935 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8937 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8939 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8941 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8944 /* Count number of gp and fp argument registers used. */
8945 words = crtl->args.info.words;
8946 n_gpr = crtl->args.info.regno;
8947 n_fpr = crtl->args.info.sse_regno;
8949 if (cfun->va_list_gpr_size)
8951 type = TREE_TYPE (gpr);
8952 t = build2 (MODIFY_EXPR, type,
8953 gpr, build_int_cst (type, n_gpr * 8));
8954 TREE_SIDE_EFFECTS (t) = 1;
8955 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8958 if (TARGET_SSE && cfun->va_list_fpr_size)
8960 type = TREE_TYPE (fpr);
8961 t = build2 (MODIFY_EXPR, type, fpr,
8962 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8963 TREE_SIDE_EFFECTS (t) = 1;
8964 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8967 /* Find the overflow area. */
8968 type = TREE_TYPE (ovf);
8969 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8970 ovf_rtx = crtl->args.internal_arg_pointer;
8972 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8973 t = make_tree (type, ovf_rtx);
8975 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8977 /* Store zero bounds for overflow area pointer. */
8978 if (chkp_function_instrumented_p (current_function_decl))
8979 chkp_expand_bounds_reset_for_mem (ovf, t);
8981 t = build2 (MODIFY_EXPR, type, ovf, t);
8982 TREE_SIDE_EFFECTS (t) = 1;
8983 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8985 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8987 /* Find the register save area.
8988 Prologue of the function save it right above stack frame. */
8989 type = TREE_TYPE (sav);
8990 t = make_tree (type, frame_pointer_rtx);
8991 if (!ix86_varargs_gpr_size)
8992 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8994 /* Store zero bounds for save area pointer. */
8995 if (chkp_function_instrumented_p (current_function_decl))
8996 chkp_expand_bounds_reset_for_mem (sav, t);
8998 t = build2 (MODIFY_EXPR, type, sav, t);
8999 TREE_SIDE_EFFECTS (t) = 1;
9000 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9004 /* Implement va_arg. */
9007 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9010 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
9011 tree f_gpr, f_fpr, f_ovf, f_sav;
9012 tree gpr, fpr, ovf, sav, t;
9014 tree lab_false, lab_over = NULL_TREE;
9019 machine_mode nat_mode;
9020 unsigned int arg_boundary;
9022 /* Only 64bit target needs something special. */
9023 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
9024 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
9026 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9027 f_fpr = DECL_CHAIN (f_gpr);
9028 f_ovf = DECL_CHAIN (f_fpr);
9029 f_sav = DECL_CHAIN (f_ovf);
9031 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
9032 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
9033 valist = build_va_arg_indirect_ref (valist);
9034 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9035 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9036 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9038 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9040 type = build_pointer_type (type);
9041 size = int_size_in_bytes (type);
9042 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
9044 nat_mode = type_natural_mode (type, NULL, false);
9059 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9060 if (!TARGET_64BIT_MS_ABI)
9067 container = construct_container (nat_mode, TYPE_MODE (type),
9068 type, 0, X86_64_REGPARM_MAX,
9069 X86_64_SSE_REGPARM_MAX, intreg,
9074 /* Pull the value out of the saved registers. */
9076 addr = create_tmp_var (ptr_type_node, "addr");
9080 int needed_intregs, needed_sseregs;
9082 tree int_addr, sse_addr;
9084 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9085 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9087 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9089 need_temp = (!REG_P (container)
9090 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9091 || TYPE_ALIGN (type) > 128));
9093 /* In case we are passing structure, verify that it is consecutive block
9094 on the register save area. If not we need to do moves. */
9095 if (!need_temp && !REG_P (container))
9097 /* Verify that all registers are strictly consecutive */
9098 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9102 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9104 rtx slot = XVECEXP (container, 0, i);
9105 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9106 || INTVAL (XEXP (slot, 1)) != i * 16)
9114 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9116 rtx slot = XVECEXP (container, 0, i);
9117 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9118 || INTVAL (XEXP (slot, 1)) != i * 8)
9130 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9131 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9134 /* First ensure that we fit completely in registers. */
9137 t = build_int_cst (TREE_TYPE (gpr),
9138 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9139 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9140 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9141 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9142 gimplify_and_add (t, pre_p);
9146 t = build_int_cst (TREE_TYPE (fpr),
9147 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9148 + X86_64_REGPARM_MAX * 8);
9149 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9150 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9151 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9152 gimplify_and_add (t, pre_p);
9155 /* Compute index to start of area used for integer regs. */
9158 /* int_addr = gpr + sav; */
9159 t = fold_build_pointer_plus (sav, gpr);
9160 gimplify_assign (int_addr, t, pre_p);
9164 /* sse_addr = fpr + sav; */
9165 t = fold_build_pointer_plus (sav, fpr);
9166 gimplify_assign (sse_addr, t, pre_p);
9170 int i, prev_size = 0;
9171 tree temp = create_tmp_var (type, "va_arg_tmp");
9174 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9175 gimplify_assign (addr, t, pre_p);
9177 for (i = 0; i < XVECLEN (container, 0); i++)
9179 rtx slot = XVECEXP (container, 0, i);
9180 rtx reg = XEXP (slot, 0);
9181 machine_mode mode = GET_MODE (reg);
9187 tree dest_addr, dest;
9188 int cur_size = GET_MODE_SIZE (mode);
9190 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9191 prev_size = INTVAL (XEXP (slot, 1));
9192 if (prev_size + cur_size > size)
9194 cur_size = size - prev_size;
9195 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9196 if (mode == BLKmode)
9199 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9200 if (mode == GET_MODE (reg))
9201 addr_type = build_pointer_type (piece_type);
9203 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9205 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9208 if (SSE_REGNO_P (REGNO (reg)))
9210 src_addr = sse_addr;
9211 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9215 src_addr = int_addr;
9216 src_offset = REGNO (reg) * 8;
9218 src_addr = fold_convert (addr_type, src_addr);
9219 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9221 dest_addr = fold_convert (daddr_type, addr);
9222 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9223 if (cur_size == GET_MODE_SIZE (mode))
9225 src = build_va_arg_indirect_ref (src_addr);
9226 dest = build_va_arg_indirect_ref (dest_addr);
9228 gimplify_assign (dest, src, pre_p);
9233 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9234 3, dest_addr, src_addr,
9235 size_int (cur_size));
9236 gimplify_and_add (copy, pre_p);
9238 prev_size += cur_size;
9244 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9245 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9246 gimplify_assign (gpr, t, pre_p);
9251 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9252 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9253 gimplify_assign (fpr, t, pre_p);
9256 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9258 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9261 /* ... otherwise out of the overflow area. */
9263 /* When we align parameter on stack for caller, if the parameter
9264 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9265 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9266 here with caller. */
9267 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9268 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9269 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9271 /* Care for on-stack alignment if needed. */
9272 if (arg_boundary <= 64 || size == 0)
9276 HOST_WIDE_INT align = arg_boundary / 8;
9277 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9278 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9279 build_int_cst (TREE_TYPE (t), -align));
9282 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9283 gimplify_assign (addr, t, pre_p);
9285 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9286 gimplify_assign (unshare_expr (ovf), t, pre_p);
9289 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9291 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9292 addr = fold_convert (ptrtype, addr);
9295 addr = build_va_arg_indirect_ref (addr);
9296 return build_va_arg_indirect_ref (addr);
9299 /* Return true if OPNUM's MEM should be matched
9300 in movabs* patterns. */
9303 ix86_check_movabs (rtx insn, int opnum)
9307 set = PATTERN (insn);
9308 if (GET_CODE (set) == PARALLEL)
9309 set = XVECEXP (set, 0, 0);
9310 gcc_assert (GET_CODE (set) == SET);
9311 mem = XEXP (set, opnum);
9312 while (GET_CODE (mem) == SUBREG)
9313 mem = SUBREG_REG (mem);
9314 gcc_assert (MEM_P (mem));
9315 return volatile_ok || !MEM_VOLATILE_P (mem);
9318 /* Initialize the table of extra 80387 mathematical constants. */
9321 init_ext_80387_constants (void)
9323 static const char * cst[5] =
9325 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9326 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9327 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9328 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9329 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9333 for (i = 0; i < 5; i++)
9335 real_from_string (&ext_80387_constants_table[i], cst[i]);
9336 /* Ensure each constant is rounded to XFmode precision. */
9337 real_convert (&ext_80387_constants_table[i],
9338 XFmode, &ext_80387_constants_table[i]);
9341 ext_80387_constants_init = 1;
9344 /* Return non-zero if the constant is something that
9345 can be loaded with a special instruction. */
9348 standard_80387_constant_p (rtx x)
9350 machine_mode mode = GET_MODE (x);
9354 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
9357 if (x == CONST0_RTX (mode))
9359 if (x == CONST1_RTX (mode))
9362 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9364 /* For XFmode constants, try to find a special 80387 instruction when
9365 optimizing for size or on those CPUs that benefit from them. */
9367 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9371 if (! ext_80387_constants_init)
9372 init_ext_80387_constants ();
9374 for (i = 0; i < 5; i++)
9375 if (real_identical (&r, &ext_80387_constants_table[i]))
9379 /* Load of the constant -0.0 or -1.0 will be split as
9380 fldz;fchs or fld1;fchs sequence. */
9381 if (real_isnegzero (&r))
9383 if (real_identical (&r, &dconstm1))
9389 /* Return the opcode of the special instruction to be used to load
9393 standard_80387_constant_opcode (rtx x)
9395 switch (standard_80387_constant_p (x))
9419 /* Return the CONST_DOUBLE representing the 80387 constant that is
9420 loaded by the specified special instruction. The argument IDX
9421 matches the return value from standard_80387_constant_p. */
9424 standard_80387_constant_rtx (int idx)
9428 if (! ext_80387_constants_init)
9429 init_ext_80387_constants ();
9445 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9449 /* Return 1 if X is all 0s and 2 if x is all 1s
9450 in supported SSE/AVX vector mode. */
9453 standard_sse_constant_p (rtx x)
9455 machine_mode mode = GET_MODE (x);
9457 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
9459 if (vector_all_ones_operand (x, mode))
9487 /* Return the opcode of the special instruction to be used to load
9491 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9493 switch (standard_sse_constant_p (x))
9496 switch (get_attr_mode (insn))
9499 return "vpxord\t%g0, %g0, %g0";
9501 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9502 : "vpxord\t%g0, %g0, %g0";
9504 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9505 : "vpxorq\t%g0, %g0, %g0";
9507 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9508 : "%vpxor\t%0, %d0";
9510 return "%vxorpd\t%0, %d0";
9512 return "%vxorps\t%0, %d0";
9515 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9516 : "vpxor\t%x0, %x0, %x0";
9518 return "vxorpd\t%x0, %x0, %x0";
9520 return "vxorps\t%x0, %x0, %x0";
9528 || get_attr_mode (insn) == MODE_XI
9529 || get_attr_mode (insn) == MODE_V8DF
9530 || get_attr_mode (insn) == MODE_V16SF)
9531 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9533 return "vpcmpeqd\t%0, %0, %0";
9535 return "pcmpeqd\t%0, %0";
9543 /* Returns true if OP contains a symbol reference */
9546 symbolic_reference_mentioned_p (rtx op)
9551 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9554 fmt = GET_RTX_FORMAT (GET_CODE (op));
9555 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9561 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9562 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9566 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9573 /* Return true if it is appropriate to emit `ret' instructions in the
9574 body of a function. Do this only if the epilogue is simple, needing a
9575 couple of insns. Prior to reloading, we can't tell how many registers
9576 must be saved, so return false then. Return false if there is no frame
9577 marker to de-allocate. */
9580 ix86_can_use_return_insn_p (void)
9582 struct ix86_frame frame;
9584 if (! reload_completed || frame_pointer_needed)
9587 /* Don't allow more than 32k pop, since that's all we can do
9588 with one instruction. */
9589 if (crtl->args.pops_args && crtl->args.size >= 32768)
9592 ix86_compute_frame_layout (&frame);
9593 return (frame.stack_pointer_offset == UNITS_PER_WORD
9594 && (frame.nregs + frame.nsseregs) == 0);
9597 /* Value should be nonzero if functions must have frame pointers.
9598 Zero means the frame pointer need not be set up (and parms may
9599 be accessed via the stack pointer) in functions that seem suitable. */
9602 ix86_frame_pointer_required (void)
9604 /* If we accessed previous frames, then the generated code expects
9605 to be able to access the saved ebp value in our frame. */
9606 if (cfun->machine->accesses_prev_frame)
9609 /* Several x86 os'es need a frame pointer for other reasons,
9610 usually pertaining to setjmp. */
9611 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9614 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9615 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9618 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9619 allocation is 4GB. */
9620 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9623 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9624 turns off the frame pointer by default. Turn it back on now if
9625 we've not got a leaf function. */
9626 if (TARGET_OMIT_LEAF_FRAME_POINTER
9628 || ix86_current_function_calls_tls_descriptor))
9631 if (crtl->profile && !flag_fentry)
9637 /* Record that the current function accesses previous call frames. */
9640 ix86_setup_frame_addresses (void)
9642 cfun->machine->accesses_prev_frame = 1;
9645 #ifndef USE_HIDDEN_LINKONCE
9646 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9647 # define USE_HIDDEN_LINKONCE 1
9649 # define USE_HIDDEN_LINKONCE 0
9653 static int pic_labels_used;
9655 /* Fills in the label name that should be used for a pc thunk for
9656 the given register. */
9659 get_pc_thunk_name (char name[32], unsigned int regno)
9661 gcc_assert (!TARGET_64BIT);
9663 if (USE_HIDDEN_LINKONCE)
9664 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9666 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9670 /* This function generates code for -fpic that loads %ebx with
9671 the return address of the caller and then returns. */
9674 ix86_code_end (void)
9679 for (regno = AX_REG; regno <= SP_REG; regno++)
9684 if (!(pic_labels_used & (1 << regno)))
9687 get_pc_thunk_name (name, regno);
9689 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9690 get_identifier (name),
9691 build_function_type_list (void_type_node, NULL_TREE));
9692 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9693 NULL_TREE, void_type_node);
9694 TREE_PUBLIC (decl) = 1;
9695 TREE_STATIC (decl) = 1;
9696 DECL_IGNORED_P (decl) = 1;
9701 switch_to_section (darwin_sections[text_coal_section]);
9702 fputs ("\t.weak_definition\t", asm_out_file);
9703 assemble_name (asm_out_file, name);
9704 fputs ("\n\t.private_extern\t", asm_out_file);
9705 assemble_name (asm_out_file, name);
9706 putc ('\n', asm_out_file);
9707 ASM_OUTPUT_LABEL (asm_out_file, name);
9708 DECL_WEAK (decl) = 1;
9712 if (USE_HIDDEN_LINKONCE)
9714 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9716 targetm.asm_out.unique_section (decl, 0);
9717 switch_to_section (get_named_section (decl, NULL, 0));
9719 targetm.asm_out.globalize_label (asm_out_file, name);
9720 fputs ("\t.hidden\t", asm_out_file);
9721 assemble_name (asm_out_file, name);
9722 putc ('\n', asm_out_file);
9723 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9727 switch_to_section (text_section);
9728 ASM_OUTPUT_LABEL (asm_out_file, name);
9731 DECL_INITIAL (decl) = make_node (BLOCK);
9732 current_function_decl = decl;
9733 init_function_start (decl);
9734 first_function_block_is_cold = false;
9735 /* Make sure unwind info is emitted for the thunk if needed. */
9736 final_start_function (emit_barrier (), asm_out_file, 1);
9738 /* Pad stack IP move with 4 instructions (two NOPs count
9739 as one instruction). */
9740 if (TARGET_PAD_SHORT_FUNCTION)
9745 fputs ("\tnop\n", asm_out_file);
9748 xops[0] = gen_rtx_REG (Pmode, regno);
9749 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9750 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9751 output_asm_insn ("%!ret", NULL);
9752 final_end_function ();
9753 init_insn_lengths ();
9754 free_after_compilation (cfun);
9756 current_function_decl = NULL;
9759 if (flag_split_stack)
9760 file_end_indicate_split_stack ();
9763 /* Emit code for the SET_GOT patterns. */
9766 output_set_got (rtx dest, rtx label)
9772 if (TARGET_VXWORKS_RTP && flag_pic)
9774 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9775 xops[2] = gen_rtx_MEM (Pmode,
9776 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9777 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9779 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9780 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9781 an unadorned address. */
9782 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9783 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9784 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9788 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9793 /* We don't need a pic base, we're not producing pic. */
9796 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9797 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9798 targetm.asm_out.internal_label (asm_out_file, "L",
9799 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9804 get_pc_thunk_name (name, REGNO (dest));
9805 pic_labels_used |= 1 << REGNO (dest);
9807 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9808 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9809 output_asm_insn ("%!call\t%X2", xops);
9812 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9813 This is what will be referenced by the Mach-O PIC subsystem. */
9814 if (machopic_should_output_picbase_label () || !label)
9815 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9817 /* When we are restoring the pic base at the site of a nonlocal label,
9818 and we decided to emit the pic base above, we will still output a
9819 local label used for calculating the correction offset (even though
9820 the offset will be 0 in that case). */
9822 targetm.asm_out.internal_label (asm_out_file, "L",
9823 CODE_LABEL_NUMBER (label));
9828 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9833 /* Generate an "push" pattern for input ARG. */
9838 struct machine_function *m = cfun->machine;
9840 if (m->fs.cfa_reg == stack_pointer_rtx)
9841 m->fs.cfa_offset += UNITS_PER_WORD;
9842 m->fs.sp_offset += UNITS_PER_WORD;
9844 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9845 arg = gen_rtx_REG (word_mode, REGNO (arg));
9847 return gen_rtx_SET (VOIDmode,
9848 gen_rtx_MEM (word_mode,
9849 gen_rtx_PRE_DEC (Pmode,
9850 stack_pointer_rtx)),
9854 /* Generate an "pop" pattern for input ARG. */
9859 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9860 arg = gen_rtx_REG (word_mode, REGNO (arg));
9862 return gen_rtx_SET (VOIDmode,
9864 gen_rtx_MEM (word_mode,
9865 gen_rtx_POST_INC (Pmode,
9866 stack_pointer_rtx)));
9869 /* Return >= 0 if there is an unused call-clobbered register available
9870 for the entire function. */
9873 ix86_select_alt_pic_regnum (void)
9875 if (ix86_use_pseudo_pic_reg ())
9876 return INVALID_REGNUM;
9880 && !ix86_current_function_calls_tls_descriptor)
9883 /* Can't use the same register for both PIC and DRAP. */
9885 drap = REGNO (crtl->drap_reg);
9888 for (i = 2; i >= 0; --i)
9889 if (i != drap && !df_regs_ever_live_p (i))
9893 return INVALID_REGNUM;
9896 /* Return TRUE if we need to save REGNO. */
9899 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9901 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
9902 && pic_offset_table_rtx)
9904 if (ix86_use_pseudo_pic_reg ())
9906 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
9907 _mcount in prologue. */
9908 if (!TARGET_64BIT && flag_pic && crtl->profile)
9911 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9913 || crtl->calls_eh_return
9914 || crtl->uses_const_pool
9915 || cfun->has_nonlocal_label)
9916 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9919 if (crtl->calls_eh_return && maybe_eh_return)
9924 unsigned test = EH_RETURN_DATA_REGNO (i);
9925 if (test == INVALID_REGNUM)
9933 && regno == REGNO (crtl->drap_reg)
9934 && !cfun->machine->no_drap_save_restore)
9937 return (df_regs_ever_live_p (regno)
9938 && !call_used_regs[regno]
9939 && !fixed_regs[regno]
9940 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9943 /* Return number of saved general prupose registers. */
9946 ix86_nsaved_regs (void)
9951 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9952 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9957 /* Return number of saved SSE registrers. */
9960 ix86_nsaved_sseregs (void)
9965 if (!TARGET_64BIT_MS_ABI)
9967 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9968 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9973 /* Given FROM and TO register numbers, say whether this elimination is
9974 allowed. If stack alignment is needed, we can only replace argument
9975 pointer with hard frame pointer, or replace frame pointer with stack
9976 pointer. Otherwise, frame pointer elimination is automatically
9977 handled and all other eliminations are valid. */
9980 ix86_can_eliminate (const int from, const int to)
9982 if (stack_realign_fp)
9983 return ((from == ARG_POINTER_REGNUM
9984 && to == HARD_FRAME_POINTER_REGNUM)
9985 || (from == FRAME_POINTER_REGNUM
9986 && to == STACK_POINTER_REGNUM));
9988 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9991 /* Return the offset between two registers, one to be eliminated, and the other
9992 its replacement, at the start of a routine. */
9995 ix86_initial_elimination_offset (int from, int to)
9997 struct ix86_frame frame;
9998 ix86_compute_frame_layout (&frame);
10000 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10001 return frame.hard_frame_pointer_offset;
10002 else if (from == FRAME_POINTER_REGNUM
10003 && to == HARD_FRAME_POINTER_REGNUM)
10004 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
10007 gcc_assert (to == STACK_POINTER_REGNUM);
10009 if (from == ARG_POINTER_REGNUM)
10010 return frame.stack_pointer_offset;
10012 gcc_assert (from == FRAME_POINTER_REGNUM);
10013 return frame.stack_pointer_offset - frame.frame_pointer_offset;
10017 /* In a dynamically-aligned function, we can't know the offset from
10018 stack pointer to frame pointer, so we must ensure that setjmp
10019 eliminates fp against the hard fp (%ebp) rather than trying to
10020 index from %esp up to the top of the frame across a gap that is
10021 of unknown (at compile-time) size. */
10023 ix86_builtin_setjmp_frame_value (void)
10025 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
10028 /* When using -fsplit-stack, the allocation routines set a field in
10029 the TCB to the bottom of the stack plus this much space, measured
10032 #define SPLIT_STACK_AVAILABLE 256
10034 /* Fill structure ix86_frame about frame of currently computed function. */
10037 ix86_compute_frame_layout (struct ix86_frame *frame)
10039 unsigned HOST_WIDE_INT stack_alignment_needed;
10040 HOST_WIDE_INT offset;
10041 unsigned HOST_WIDE_INT preferred_alignment;
10042 HOST_WIDE_INT size = get_frame_size ();
10043 HOST_WIDE_INT to_allocate;
10045 frame->nregs = ix86_nsaved_regs ();
10046 frame->nsseregs = ix86_nsaved_sseregs ();
10048 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
10049 function prologues and leaf. */
10050 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
10051 && (!crtl->is_leaf || cfun->calls_alloca != 0
10052 || ix86_current_function_calls_tls_descriptor))
10054 crtl->preferred_stack_boundary = 128;
10055 crtl->stack_alignment_needed = 128;
10057 /* preferred_stack_boundary is never updated for call
10058 expanded from tls descriptor. Update it here. We don't update it in
10059 expand stage because according to the comments before
10060 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
10062 else if (ix86_current_function_calls_tls_descriptor
10063 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
10065 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
10066 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
10067 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
10070 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
10071 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
10073 gcc_assert (!size || stack_alignment_needed);
10074 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
10075 gcc_assert (preferred_alignment <= stack_alignment_needed);
10077 /* For SEH we have to limit the amount of code movement into the prologue.
10078 At present we do this via a BLOCKAGE, at which point there's very little
10079 scheduling that can be done, which means that there's very little point
10080 in doing anything except PUSHs. */
10082 cfun->machine->use_fast_prologue_epilogue = false;
10084 /* During reload iteration the amount of registers saved can change.
10085 Recompute the value as needed. Do not recompute when amount of registers
10086 didn't change as reload does multiple calls to the function and does not
10087 expect the decision to change within single iteration. */
10088 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
10089 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
10091 int count = frame->nregs;
10092 struct cgraph_node *node = cgraph_node::get (current_function_decl);
10094 cfun->machine->use_fast_prologue_epilogue_nregs = count;
10096 /* The fast prologue uses move instead of push to save registers. This
10097 is significantly longer, but also executes faster as modern hardware
10098 can execute the moves in parallel, but can't do that for push/pop.
10100 Be careful about choosing what prologue to emit: When function takes
10101 many instructions to execute we may use slow version as well as in
10102 case function is known to be outside hot spot (this is known with
10103 feedback only). Weight the size of function by number of registers
10104 to save as it is cheap to use one or two push instructions but very
10105 slow to use many of them. */
10107 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10108 if (node->frequency < NODE_FREQUENCY_NORMAL
10109 || (flag_branch_probabilities
10110 && node->frequency < NODE_FREQUENCY_HOT))
10111 cfun->machine->use_fast_prologue_epilogue = false;
10113 cfun->machine->use_fast_prologue_epilogue
10114 = !expensive_function_p (count);
10117 frame->save_regs_using_mov
10118 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10119 /* If static stack checking is enabled and done with probes,
10120 the registers need to be saved before allocating the frame. */
10121 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10123 /* Skip return address. */
10124 offset = UNITS_PER_WORD;
10126 /* Skip pushed static chain. */
10127 if (ix86_static_chain_on_stack)
10128 offset += UNITS_PER_WORD;
10130 /* Skip saved base pointer. */
10131 if (frame_pointer_needed)
10132 offset += UNITS_PER_WORD;
10133 frame->hfp_save_offset = offset;
10135 /* The traditional frame pointer location is at the top of the frame. */
10136 frame->hard_frame_pointer_offset = offset;
10138 /* Register save area */
10139 offset += frame->nregs * UNITS_PER_WORD;
10140 frame->reg_save_offset = offset;
10142 /* On SEH target, registers are pushed just before the frame pointer
10145 frame->hard_frame_pointer_offset = offset;
10147 /* Align and set SSE register save area. */
10148 if (frame->nsseregs)
10150 /* The only ABI that has saved SSE registers (Win64) also has a
10151 16-byte aligned default stack, and thus we don't need to be
10152 within the re-aligned local stack frame to save them. */
10153 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10154 offset = (offset + 16 - 1) & -16;
10155 offset += frame->nsseregs * 16;
10157 frame->sse_reg_save_offset = offset;
10159 /* The re-aligned stack starts here. Values before this point are not
10160 directly comparable with values below this point. In order to make
10161 sure that no value happens to be the same before and after, force
10162 the alignment computation below to add a non-zero value. */
10163 if (stack_realign_fp)
10164 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10167 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10168 offset += frame->va_arg_size;
10170 /* Align start of frame for local function. */
10171 if (stack_realign_fp
10172 || offset != frame->sse_reg_save_offset
10175 || cfun->calls_alloca
10176 || ix86_current_function_calls_tls_descriptor)
10177 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10179 /* Frame pointer points here. */
10180 frame->frame_pointer_offset = offset;
10184 /* Add outgoing arguments area. Can be skipped if we eliminated
10185 all the function calls as dead code.
10186 Skipping is however impossible when function calls alloca. Alloca
10187 expander assumes that last crtl->outgoing_args_size
10188 of stack frame are unused. */
10189 if (ACCUMULATE_OUTGOING_ARGS
10190 && (!crtl->is_leaf || cfun->calls_alloca
10191 || ix86_current_function_calls_tls_descriptor))
10193 offset += crtl->outgoing_args_size;
10194 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10197 frame->outgoing_arguments_size = 0;
10199 /* Align stack boundary. Only needed if we're calling another function
10200 or using alloca. */
10201 if (!crtl->is_leaf || cfun->calls_alloca
10202 || ix86_current_function_calls_tls_descriptor)
10203 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10205 /* We've reached end of stack frame. */
10206 frame->stack_pointer_offset = offset;
10208 /* Size prologue needs to allocate. */
10209 to_allocate = offset - frame->sse_reg_save_offset;
10211 if ((!to_allocate && frame->nregs <= 1)
10212 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10213 frame->save_regs_using_mov = false;
10215 if (ix86_using_red_zone ()
10216 && crtl->sp_is_unchanging
10218 && !ix86_current_function_calls_tls_descriptor)
10220 frame->red_zone_size = to_allocate;
10221 if (frame->save_regs_using_mov)
10222 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10223 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10224 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10227 frame->red_zone_size = 0;
10228 frame->stack_pointer_offset -= frame->red_zone_size;
10230 /* The SEH frame pointer location is near the bottom of the frame.
10231 This is enforced by the fact that the difference between the
10232 stack pointer and the frame pointer is limited to 240 bytes in
10233 the unwind data structure. */
10236 HOST_WIDE_INT diff;
10238 /* If we can leave the frame pointer where it is, do so. Also, returns
10239 the establisher frame for __builtin_frame_address (0). */
10240 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10241 if (diff <= SEH_MAX_FRAME_SIZE
10242 && (diff > 240 || (diff & 15) != 0)
10243 && !crtl->accesses_prior_frames)
10245 /* Ideally we'd determine what portion of the local stack frame
10246 (within the constraint of the lowest 240) is most heavily used.
10247 But without that complication, simply bias the frame pointer
10248 by 128 bytes so as to maximize the amount of the local stack
10249 frame that is addressable with 8-bit offsets. */
10250 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10255 /* This is semi-inlined memory_address_length, but simplified
10256 since we know that we're always dealing with reg+offset, and
10257 to avoid having to create and discard all that rtl. */
10260 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10266 /* EBP and R13 cannot be encoded without an offset. */
10267 len = (regno == BP_REG || regno == R13_REG);
10269 else if (IN_RANGE (offset, -128, 127))
10272 /* ESP and R12 must be encoded with a SIB byte. */
10273 if (regno == SP_REG || regno == R12_REG)
10279 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10280 The valid base registers are taken from CFUN->MACHINE->FS. */
10283 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10285 const struct machine_function *m = cfun->machine;
10286 rtx base_reg = NULL;
10287 HOST_WIDE_INT base_offset = 0;
10289 if (m->use_fast_prologue_epilogue)
10291 /* Choose the base register most likely to allow the most scheduling
10292 opportunities. Generally FP is valid throughout the function,
10293 while DRAP must be reloaded within the epilogue. But choose either
10294 over the SP due to increased encoding size. */
10296 if (m->fs.fp_valid)
10298 base_reg = hard_frame_pointer_rtx;
10299 base_offset = m->fs.fp_offset - cfa_offset;
10301 else if (m->fs.drap_valid)
10303 base_reg = crtl->drap_reg;
10304 base_offset = 0 - cfa_offset;
10306 else if (m->fs.sp_valid)
10308 base_reg = stack_pointer_rtx;
10309 base_offset = m->fs.sp_offset - cfa_offset;
10314 HOST_WIDE_INT toffset;
10315 int len = 16, tlen;
10317 /* Choose the base register with the smallest address encoding.
10318 With a tie, choose FP > DRAP > SP. */
10319 if (m->fs.sp_valid)
10321 base_reg = stack_pointer_rtx;
10322 base_offset = m->fs.sp_offset - cfa_offset;
10323 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10325 if (m->fs.drap_valid)
10327 toffset = 0 - cfa_offset;
10328 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10331 base_reg = crtl->drap_reg;
10332 base_offset = toffset;
10336 if (m->fs.fp_valid)
10338 toffset = m->fs.fp_offset - cfa_offset;
10339 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10342 base_reg = hard_frame_pointer_rtx;
10343 base_offset = toffset;
10348 gcc_assert (base_reg != NULL);
10350 return plus_constant (Pmode, base_reg, base_offset);
10353 /* Emit code to save registers in the prologue. */
10356 ix86_emit_save_regs (void)
10358 unsigned int regno;
10361 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10362 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10364 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10365 RTX_FRAME_RELATED_P (insn) = 1;
10369 /* Emit a single register save at CFA - CFA_OFFSET. */
10372 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10373 HOST_WIDE_INT cfa_offset)
10375 struct machine_function *m = cfun->machine;
10376 rtx reg = gen_rtx_REG (mode, regno);
10377 rtx mem, addr, base, insn;
10379 addr = choose_baseaddr (cfa_offset);
10380 mem = gen_frame_mem (mode, addr);
10382 /* For SSE saves, we need to indicate the 128-bit alignment. */
10383 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10385 insn = emit_move_insn (mem, reg);
10386 RTX_FRAME_RELATED_P (insn) = 1;
10389 if (GET_CODE (base) == PLUS)
10390 base = XEXP (base, 0);
10391 gcc_checking_assert (REG_P (base));
10393 /* When saving registers into a re-aligned local stack frame, avoid
10394 any tricky guessing by dwarf2out. */
10395 if (m->fs.realigned)
10397 gcc_checking_assert (stack_realign_drap);
10399 if (regno == REGNO (crtl->drap_reg))
10401 /* A bit of a hack. We force the DRAP register to be saved in
10402 the re-aligned stack frame, which provides us with a copy
10403 of the CFA that will last past the prologue. Install it. */
10404 gcc_checking_assert (cfun->machine->fs.fp_valid);
10405 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10406 cfun->machine->fs.fp_offset - cfa_offset);
10407 mem = gen_rtx_MEM (mode, addr);
10408 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10412 /* The frame pointer is a stable reference within the
10413 aligned frame. Use it. */
10414 gcc_checking_assert (cfun->machine->fs.fp_valid);
10415 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10416 cfun->machine->fs.fp_offset - cfa_offset);
10417 mem = gen_rtx_MEM (mode, addr);
10418 add_reg_note (insn, REG_CFA_EXPRESSION,
10419 gen_rtx_SET (VOIDmode, mem, reg));
10423 /* The memory may not be relative to the current CFA register,
10424 which means that we may need to generate a new pattern for
10425 use by the unwind info. */
10426 else if (base != m->fs.cfa_reg)
10428 addr = plus_constant (Pmode, m->fs.cfa_reg,
10429 m->fs.cfa_offset - cfa_offset);
10430 mem = gen_rtx_MEM (mode, addr);
10431 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10435 /* Emit code to save registers using MOV insns.
10436 First register is stored at CFA - CFA_OFFSET. */
10438 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10440 unsigned int regno;
10442 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10443 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10445 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10446 cfa_offset -= UNITS_PER_WORD;
10450 /* Emit code to save SSE registers using MOV insns.
10451 First register is stored at CFA - CFA_OFFSET. */
10453 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10455 unsigned int regno;
10457 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10458 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10460 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10465 static GTY(()) rtx queued_cfa_restores;
10467 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10468 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10469 Don't add the note if the previously saved value will be left untouched
10470 within stack red-zone till return, as unwinders can find the same value
10471 in the register and on the stack. */
10474 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
10476 if (!crtl->shrink_wrapped
10477 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10482 add_reg_note (insn, REG_CFA_RESTORE, reg);
10483 RTX_FRAME_RELATED_P (insn) = 1;
10486 queued_cfa_restores
10487 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10490 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10493 ix86_add_queued_cfa_restore_notes (rtx insn)
10496 if (!queued_cfa_restores)
10498 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10500 XEXP (last, 1) = REG_NOTES (insn);
10501 REG_NOTES (insn) = queued_cfa_restores;
10502 queued_cfa_restores = NULL_RTX;
10503 RTX_FRAME_RELATED_P (insn) = 1;
10506 /* Expand prologue or epilogue stack adjustment.
10507 The pattern exist to put a dependency on all ebp-based memory accesses.
10508 STYLE should be negative if instructions should be marked as frame related,
10509 zero if %r11 register is live and cannot be freely used and positive
10513 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10514 int style, bool set_cfa)
10516 struct machine_function *m = cfun->machine;
10518 bool add_frame_related_expr = false;
10520 if (Pmode == SImode)
10521 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10522 else if (x86_64_immediate_operand (offset, DImode))
10523 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10527 /* r11 is used by indirect sibcall return as well, set before the
10528 epilogue and used after the epilogue. */
10530 tmp = gen_rtx_REG (DImode, R11_REG);
10533 gcc_assert (src != hard_frame_pointer_rtx
10534 && dest != hard_frame_pointer_rtx);
10535 tmp = hard_frame_pointer_rtx;
10537 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10539 add_frame_related_expr = true;
10541 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10544 insn = emit_insn (insn);
10546 ix86_add_queued_cfa_restore_notes (insn);
10552 gcc_assert (m->fs.cfa_reg == src);
10553 m->fs.cfa_offset += INTVAL (offset);
10554 m->fs.cfa_reg = dest;
10556 r = gen_rtx_PLUS (Pmode, src, offset);
10557 r = gen_rtx_SET (VOIDmode, dest, r);
10558 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10559 RTX_FRAME_RELATED_P (insn) = 1;
10561 else if (style < 0)
10563 RTX_FRAME_RELATED_P (insn) = 1;
10564 if (add_frame_related_expr)
10566 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10567 r = gen_rtx_SET (VOIDmode, dest, r);
10568 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10572 if (dest == stack_pointer_rtx)
10574 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10575 bool valid = m->fs.sp_valid;
10577 if (src == hard_frame_pointer_rtx)
10579 valid = m->fs.fp_valid;
10580 ooffset = m->fs.fp_offset;
10582 else if (src == crtl->drap_reg)
10584 valid = m->fs.drap_valid;
10589 /* Else there are two possibilities: SP itself, which we set
10590 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10591 taken care of this by hand along the eh_return path. */
10592 gcc_checking_assert (src == stack_pointer_rtx
10593 || offset == const0_rtx);
10596 m->fs.sp_offset = ooffset - INTVAL (offset);
10597 m->fs.sp_valid = valid;
10601 /* Find an available register to be used as dynamic realign argument
10602 pointer regsiter. Such a register will be written in prologue and
10603 used in begin of body, so it must not be
10604 1. parameter passing register.
10606 We reuse static-chain register if it is available. Otherwise, we
10607 use DI for i386 and R13 for x86-64. We chose R13 since it has
10610 Return: the regno of chosen register. */
10612 static unsigned int
10613 find_drap_reg (void)
10615 tree decl = cfun->decl;
10619 /* Use R13 for nested function or function need static chain.
10620 Since function with tail call may use any caller-saved
10621 registers in epilogue, DRAP must not use caller-saved
10622 register in such case. */
10623 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10630 /* Use DI for nested function or function need static chain.
10631 Since function with tail call may use any caller-saved
10632 registers in epilogue, DRAP must not use caller-saved
10633 register in such case. */
10634 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10637 /* Reuse static chain register if it isn't used for parameter
10639 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10641 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10642 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10649 /* Return minimum incoming stack alignment. */
10651 static unsigned int
10652 ix86_minimum_incoming_stack_boundary (bool sibcall)
10654 unsigned int incoming_stack_boundary;
10656 /* Prefer the one specified at command line. */
10657 if (ix86_user_incoming_stack_boundary)
10658 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10659 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10660 if -mstackrealign is used, it isn't used for sibcall check and
10661 estimated stack alignment is 128bit. */
10664 && ix86_force_align_arg_pointer
10665 && crtl->stack_alignment_estimated == 128)
10666 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10668 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10670 /* Incoming stack alignment can be changed on individual functions
10671 via force_align_arg_pointer attribute. We use the smallest
10672 incoming stack boundary. */
10673 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10674 && lookup_attribute (ix86_force_align_arg_pointer_string,
10675 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10676 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10678 /* The incoming stack frame has to be aligned at least at
10679 parm_stack_boundary. */
10680 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10681 incoming_stack_boundary = crtl->parm_stack_boundary;
10683 /* Stack at entrance of main is aligned by runtime. We use the
10684 smallest incoming stack boundary. */
10685 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10686 && DECL_NAME (current_function_decl)
10687 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10688 && DECL_FILE_SCOPE_P (current_function_decl))
10689 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10691 return incoming_stack_boundary;
10694 /* Update incoming stack boundary and estimated stack alignment. */
10697 ix86_update_stack_boundary (void)
10699 ix86_incoming_stack_boundary
10700 = ix86_minimum_incoming_stack_boundary (false);
10702 /* x86_64 vararg needs 16byte stack alignment for register save
10706 && crtl->stack_alignment_estimated < 128)
10707 crtl->stack_alignment_estimated = 128;
10710 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10711 needed or an rtx for DRAP otherwise. */
10714 ix86_get_drap_rtx (void)
10716 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10717 crtl->need_drap = true;
10719 if (stack_realign_drap)
10721 /* Assign DRAP to vDRAP and returns vDRAP */
10722 unsigned int regno = find_drap_reg ();
10725 rtx_insn *seq, *insn;
10727 arg_ptr = gen_rtx_REG (Pmode, regno);
10728 crtl->drap_reg = arg_ptr;
10731 drap_vreg = copy_to_reg (arg_ptr);
10732 seq = get_insns ();
10735 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10738 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10739 RTX_FRAME_RELATED_P (insn) = 1;
10747 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10750 ix86_internal_arg_pointer (void)
10752 return virtual_incoming_args_rtx;
10755 struct scratch_reg {
10760 /* Return a short-lived scratch register for use on function entry.
10761 In 32-bit mode, it is valid only after the registers are saved
10762 in the prologue. This register must be released by means of
10763 release_scratch_register_on_entry once it is dead. */
10766 get_scratch_register_on_entry (struct scratch_reg *sr)
10774 /* We always use R11 in 64-bit mode. */
10779 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10781 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10783 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10784 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10785 int regparm = ix86_function_regparm (fntype, decl);
10787 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10789 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10790 for the static chain register. */
10791 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10792 && drap_regno != AX_REG)
10794 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10795 for the static chain register. */
10796 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10798 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10800 /* ecx is the static chain register. */
10801 else if (regparm < 3 && !fastcall_p && !thiscall_p
10803 && drap_regno != CX_REG)
10805 else if (ix86_save_reg (BX_REG, true))
10807 /* esi is the static chain register. */
10808 else if (!(regparm == 3 && static_chain_p)
10809 && ix86_save_reg (SI_REG, true))
10811 else if (ix86_save_reg (DI_REG, true))
10815 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10820 sr->reg = gen_rtx_REG (Pmode, regno);
10823 rtx insn = emit_insn (gen_push (sr->reg));
10824 RTX_FRAME_RELATED_P (insn) = 1;
10828 /* Release a scratch register obtained from the preceding function. */
10831 release_scratch_register_on_entry (struct scratch_reg *sr)
10835 struct machine_function *m = cfun->machine;
10836 rtx x, insn = emit_insn (gen_pop (sr->reg));
10838 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10839 RTX_FRAME_RELATED_P (insn) = 1;
10840 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10841 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10842 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10843 m->fs.sp_offset -= UNITS_PER_WORD;
10847 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10849 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10852 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10854 /* We skip the probe for the first interval + a small dope of 4 words and
10855 probe that many bytes past the specified size to maintain a protection
10856 area at the botton of the stack. */
10857 const int dope = 4 * UNITS_PER_WORD;
10858 rtx size_rtx = GEN_INT (size), last;
10860 /* See if we have a constant small number of probes to generate. If so,
10861 that's the easy case. The run-time loop is made up of 11 insns in the
10862 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10863 for n # of intervals. */
10864 if (size <= 5 * PROBE_INTERVAL)
10866 HOST_WIDE_INT i, adjust;
10867 bool first_probe = true;
10869 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10870 values of N from 1 until it exceeds SIZE. If only one probe is
10871 needed, this will not generate any code. Then adjust and probe
10872 to PROBE_INTERVAL + SIZE. */
10873 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10877 adjust = 2 * PROBE_INTERVAL + dope;
10878 first_probe = false;
10881 adjust = PROBE_INTERVAL;
10883 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10884 plus_constant (Pmode, stack_pointer_rtx,
10886 emit_stack_probe (stack_pointer_rtx);
10890 adjust = size + PROBE_INTERVAL + dope;
10892 adjust = size + PROBE_INTERVAL - i;
10894 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10895 plus_constant (Pmode, stack_pointer_rtx,
10897 emit_stack_probe (stack_pointer_rtx);
10899 /* Adjust back to account for the additional first interval. */
10900 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10901 plus_constant (Pmode, stack_pointer_rtx,
10902 PROBE_INTERVAL + dope)));
10905 /* Otherwise, do the same as above, but in a loop. Note that we must be
10906 extra careful with variables wrapping around because we might be at
10907 the very top (or the very bottom) of the address space and we have
10908 to be able to handle this case properly; in particular, we use an
10909 equality test for the loop condition. */
10912 HOST_WIDE_INT rounded_size;
10913 struct scratch_reg sr;
10915 get_scratch_register_on_entry (&sr);
10918 /* Step 1: round SIZE to the previous multiple of the interval. */
10920 rounded_size = size & -PROBE_INTERVAL;
10923 /* Step 2: compute initial and final value of the loop counter. */
10925 /* SP = SP_0 + PROBE_INTERVAL. */
10926 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10927 plus_constant (Pmode, stack_pointer_rtx,
10928 - (PROBE_INTERVAL + dope))));
10930 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10931 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10932 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10933 gen_rtx_PLUS (Pmode, sr.reg,
10934 stack_pointer_rtx)));
10937 /* Step 3: the loop
10939 while (SP != LAST_ADDR)
10941 SP = SP + PROBE_INTERVAL
10945 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10946 values of N from 1 until it is equal to ROUNDED_SIZE. */
10948 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10951 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10952 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10954 if (size != rounded_size)
10956 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10957 plus_constant (Pmode, stack_pointer_rtx,
10958 rounded_size - size)));
10959 emit_stack_probe (stack_pointer_rtx);
10962 /* Adjust back to account for the additional first interval. */
10963 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10964 plus_constant (Pmode, stack_pointer_rtx,
10965 PROBE_INTERVAL + dope)));
10967 release_scratch_register_on_entry (&sr);
10970 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10972 /* Even if the stack pointer isn't the CFA register, we need to correctly
10973 describe the adjustments made to it, in particular differentiate the
10974 frame-related ones from the frame-unrelated ones. */
10977 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10978 XVECEXP (expr, 0, 0)
10979 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10980 plus_constant (Pmode, stack_pointer_rtx, -size));
10981 XVECEXP (expr, 0, 1)
10982 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10983 plus_constant (Pmode, stack_pointer_rtx,
10984 PROBE_INTERVAL + dope + size));
10985 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10986 RTX_FRAME_RELATED_P (last) = 1;
10988 cfun->machine->fs.sp_offset += size;
10991 /* Make sure nothing is scheduled before we are done. */
10992 emit_insn (gen_blockage ());
10995 /* Adjust the stack pointer up to REG while probing it. */
10998 output_adjust_stack_and_probe (rtx reg)
11000 static int labelno = 0;
11001 char loop_lab[32], end_lab[32];
11004 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11005 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11007 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11009 /* Jump to END_LAB if SP == LAST_ADDR. */
11010 xops[0] = stack_pointer_rtx;
11012 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11013 fputs ("\tje\t", asm_out_file);
11014 assemble_name_raw (asm_out_file, end_lab);
11015 fputc ('\n', asm_out_file);
11017 /* SP = SP + PROBE_INTERVAL. */
11018 xops[1] = GEN_INT (PROBE_INTERVAL);
11019 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11022 xops[1] = const0_rtx;
11023 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
11025 fprintf (asm_out_file, "\tjmp\t");
11026 assemble_name_raw (asm_out_file, loop_lab);
11027 fputc ('\n', asm_out_file);
11029 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11034 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
11035 inclusive. These are offsets from the current stack pointer. */
11038 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
11040 /* See if we have a constant small number of probes to generate. If so,
11041 that's the easy case. The run-time loop is made up of 7 insns in the
11042 generic case while the compile-time loop is made up of n insns for n #
11044 if (size <= 7 * PROBE_INTERVAL)
11048 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
11049 it exceeds SIZE. If only one probe is needed, this will not
11050 generate any code. Then probe at FIRST + SIZE. */
11051 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
11052 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11055 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11059 /* Otherwise, do the same as above, but in a loop. Note that we must be
11060 extra careful with variables wrapping around because we might be at
11061 the very top (or the very bottom) of the address space and we have
11062 to be able to handle this case properly; in particular, we use an
11063 equality test for the loop condition. */
11066 HOST_WIDE_INT rounded_size, last;
11067 struct scratch_reg sr;
11069 get_scratch_register_on_entry (&sr);
11072 /* Step 1: round SIZE to the previous multiple of the interval. */
11074 rounded_size = size & -PROBE_INTERVAL;
11077 /* Step 2: compute initial and final value of the loop counter. */
11079 /* TEST_OFFSET = FIRST. */
11080 emit_move_insn (sr.reg, GEN_INT (-first));
11082 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
11083 last = first + rounded_size;
11086 /* Step 3: the loop
11088 while (TEST_ADDR != LAST_ADDR)
11090 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
11094 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
11095 until it is equal to ROUNDED_SIZE. */
11097 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
11100 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
11101 that SIZE is equal to ROUNDED_SIZE. */
11103 if (size != rounded_size)
11104 emit_stack_probe (plus_constant (Pmode,
11105 gen_rtx_PLUS (Pmode,
11108 rounded_size - size));
11110 release_scratch_register_on_entry (&sr);
11113 /* Make sure nothing is scheduled before we are done. */
11114 emit_insn (gen_blockage ());
11117 /* Probe a range of stack addresses from REG to END, inclusive. These are
11118 offsets from the current stack pointer. */
11121 output_probe_stack_range (rtx reg, rtx end)
11123 static int labelno = 0;
11124 char loop_lab[32], end_lab[32];
11127 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11128 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11130 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11132 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11135 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11136 fputs ("\tje\t", asm_out_file);
11137 assemble_name_raw (asm_out_file, end_lab);
11138 fputc ('\n', asm_out_file);
11140 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11141 xops[1] = GEN_INT (PROBE_INTERVAL);
11142 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11144 /* Probe at TEST_ADDR. */
11145 xops[0] = stack_pointer_rtx;
11147 xops[2] = const0_rtx;
11148 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11150 fprintf (asm_out_file, "\tjmp\t");
11151 assemble_name_raw (asm_out_file, loop_lab);
11152 fputc ('\n', asm_out_file);
11154 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11159 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11160 to be generated in correct form. */
11162 ix86_finalize_stack_realign_flags (void)
11164 /* Check if stack realign is really needed after reload, and
11165 stores result in cfun */
11166 unsigned int incoming_stack_boundary
11167 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11168 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11169 unsigned int stack_realign = (incoming_stack_boundary
11171 ? crtl->max_used_stack_slot_alignment
11172 : crtl->stack_alignment_needed));
11174 if (crtl->stack_realign_finalized)
11176 /* After stack_realign_needed is finalized, we can't no longer
11178 gcc_assert (crtl->stack_realign_needed == stack_realign);
11182 /* If the only reason for frame_pointer_needed is that we conservatively
11183 assumed stack realignment might be needed, but in the end nothing that
11184 needed the stack alignment had been spilled, clear frame_pointer_needed
11185 and say we don't need stack realignment. */
11187 && frame_pointer_needed
11189 && flag_omit_frame_pointer
11190 && crtl->sp_is_unchanging
11191 && !ix86_current_function_calls_tls_descriptor
11192 && !crtl->accesses_prior_frames
11193 && !cfun->calls_alloca
11194 && !crtl->calls_eh_return
11195 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11196 && !ix86_frame_pointer_required ()
11197 && get_frame_size () == 0
11198 && ix86_nsaved_sseregs () == 0
11199 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11201 HARD_REG_SET set_up_by_prologue, prologue_used;
11204 CLEAR_HARD_REG_SET (prologue_used);
11205 CLEAR_HARD_REG_SET (set_up_by_prologue);
11206 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11207 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11208 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11209 HARD_FRAME_POINTER_REGNUM);
11210 FOR_EACH_BB_FN (bb, cfun)
11213 FOR_BB_INSNS (bb, insn)
11214 if (NONDEBUG_INSN_P (insn)
11215 && requires_stack_frame_p (insn, prologue_used,
11216 set_up_by_prologue))
11218 crtl->stack_realign_needed = stack_realign;
11219 crtl->stack_realign_finalized = true;
11224 /* If drap has been set, but it actually isn't live at the start
11225 of the function, there is no reason to set it up. */
11226 if (crtl->drap_reg)
11228 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11229 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11231 crtl->drap_reg = NULL_RTX;
11232 crtl->need_drap = false;
11236 cfun->machine->no_drap_save_restore = true;
11238 frame_pointer_needed = false;
11239 stack_realign = false;
11240 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11241 crtl->stack_alignment_needed = incoming_stack_boundary;
11242 crtl->stack_alignment_estimated = incoming_stack_boundary;
11243 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11244 crtl->preferred_stack_boundary = incoming_stack_boundary;
11245 df_finish_pass (true);
11246 df_scan_alloc (NULL);
11248 df_compute_regs_ever_live (true);
11252 crtl->stack_realign_needed = stack_realign;
11253 crtl->stack_realign_finalized = true;
11256 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11259 ix86_elim_entry_set_got (rtx reg)
11261 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11262 rtx_insn *c_insn = BB_HEAD (bb);
11263 if (!NONDEBUG_INSN_P (c_insn))
11264 c_insn = next_nonnote_nondebug_insn (c_insn);
11265 if (c_insn && NONJUMP_INSN_P (c_insn))
11267 rtx pat = PATTERN (c_insn);
11268 if (GET_CODE (pat) == PARALLEL)
11270 rtx vec = XVECEXP (pat, 0, 0);
11271 if (GET_CODE (vec) == SET
11272 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11273 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11274 delete_insn (c_insn);
11279 /* Expand the prologue into a bunch of separate insns. */
11282 ix86_expand_prologue (void)
11284 struct machine_function *m = cfun->machine;
11286 struct ix86_frame frame;
11287 HOST_WIDE_INT allocate;
11288 bool int_registers_saved;
11289 bool sse_registers_saved;
11291 ix86_finalize_stack_realign_flags ();
11293 /* DRAP should not coexist with stack_realign_fp */
11294 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11296 memset (&m->fs, 0, sizeof (m->fs));
11298 /* Initialize CFA state for before the prologue. */
11299 m->fs.cfa_reg = stack_pointer_rtx;
11300 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11302 /* Track SP offset to the CFA. We continue tracking this after we've
11303 swapped the CFA register away from SP. In the case of re-alignment
11304 this is fudged; we're interested to offsets within the local frame. */
11305 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11306 m->fs.sp_valid = true;
11308 ix86_compute_frame_layout (&frame);
11310 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11312 /* We should have already generated an error for any use of
11313 ms_hook on a nested function. */
11314 gcc_checking_assert (!ix86_static_chain_on_stack);
11316 /* Check if profiling is active and we shall use profiling before
11317 prologue variant. If so sorry. */
11318 if (crtl->profile && flag_fentry != 0)
11319 sorry ("ms_hook_prologue attribute isn%'t compatible "
11320 "with -mfentry for 32-bit");
11322 /* In ix86_asm_output_function_label we emitted:
11323 8b ff movl.s %edi,%edi
11325 8b ec movl.s %esp,%ebp
11327 This matches the hookable function prologue in Win32 API
11328 functions in Microsoft Windows XP Service Pack 2 and newer.
11329 Wine uses this to enable Windows apps to hook the Win32 API
11330 functions provided by Wine.
11332 What that means is that we've already set up the frame pointer. */
11334 if (frame_pointer_needed
11335 && !(crtl->drap_reg && crtl->stack_realign_needed))
11339 /* We've decided to use the frame pointer already set up.
11340 Describe this to the unwinder by pretending that both
11341 push and mov insns happen right here.
11343 Putting the unwind info here at the end of the ms_hook
11344 is done so that we can make absolutely certain we get
11345 the required byte sequence at the start of the function,
11346 rather than relying on an assembler that can produce
11347 the exact encoding required.
11349 However it does mean (in the unpatched case) that we have
11350 a 1 insn window where the asynchronous unwind info is
11351 incorrect. However, if we placed the unwind info at
11352 its correct location we would have incorrect unwind info
11353 in the patched case. Which is probably all moot since
11354 I don't expect Wine generates dwarf2 unwind info for the
11355 system libraries that use this feature. */
11357 insn = emit_insn (gen_blockage ());
11359 push = gen_push (hard_frame_pointer_rtx);
11360 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
11361 stack_pointer_rtx);
11362 RTX_FRAME_RELATED_P (push) = 1;
11363 RTX_FRAME_RELATED_P (mov) = 1;
11365 RTX_FRAME_RELATED_P (insn) = 1;
11366 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11367 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11369 /* Note that gen_push incremented m->fs.cfa_offset, even
11370 though we didn't emit the push insn here. */
11371 m->fs.cfa_reg = hard_frame_pointer_rtx;
11372 m->fs.fp_offset = m->fs.cfa_offset;
11373 m->fs.fp_valid = true;
11377 /* The frame pointer is not needed so pop %ebp again.
11378 This leaves us with a pristine state. */
11379 emit_insn (gen_pop (hard_frame_pointer_rtx));
11383 /* The first insn of a function that accepts its static chain on the
11384 stack is to push the register that would be filled in by a direct
11385 call. This insn will be skipped by the trampoline. */
11386 else if (ix86_static_chain_on_stack)
11388 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11389 emit_insn (gen_blockage ());
11391 /* We don't want to interpret this push insn as a register save,
11392 only as a stack adjustment. The real copy of the register as
11393 a save will be done later, if needed. */
11394 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11395 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
11396 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11397 RTX_FRAME_RELATED_P (insn) = 1;
11400 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11401 of DRAP is needed and stack realignment is really needed after reload */
11402 if (stack_realign_drap)
11404 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11406 /* Only need to push parameter pointer reg if it is caller saved. */
11407 if (!call_used_regs[REGNO (crtl->drap_reg)])
11409 /* Push arg pointer reg */
11410 insn = emit_insn (gen_push (crtl->drap_reg));
11411 RTX_FRAME_RELATED_P (insn) = 1;
11414 /* Grab the argument pointer. */
11415 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11416 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11417 RTX_FRAME_RELATED_P (insn) = 1;
11418 m->fs.cfa_reg = crtl->drap_reg;
11419 m->fs.cfa_offset = 0;
11421 /* Align the stack. */
11422 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11424 GEN_INT (-align_bytes)));
11425 RTX_FRAME_RELATED_P (insn) = 1;
11427 /* Replicate the return address on the stack so that return
11428 address can be reached via (argp - 1) slot. This is needed
11429 to implement macro RETURN_ADDR_RTX and intrinsic function
11430 expand_builtin_return_addr etc. */
11431 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11432 t = gen_frame_mem (word_mode, t);
11433 insn = emit_insn (gen_push (t));
11434 RTX_FRAME_RELATED_P (insn) = 1;
11436 /* For the purposes of frame and register save area addressing,
11437 we've started over with a new frame. */
11438 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11439 m->fs.realigned = true;
11442 int_registers_saved = (frame.nregs == 0);
11443 sse_registers_saved = (frame.nsseregs == 0);
11445 if (frame_pointer_needed && !m->fs.fp_valid)
11447 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11448 slower on all targets. Also sdb doesn't like it. */
11449 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11450 RTX_FRAME_RELATED_P (insn) = 1;
11452 /* Push registers now, before setting the frame pointer
11454 if (!int_registers_saved
11456 && !frame.save_regs_using_mov)
11458 ix86_emit_save_regs ();
11459 int_registers_saved = true;
11460 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11463 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11465 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11466 RTX_FRAME_RELATED_P (insn) = 1;
11468 if (m->fs.cfa_reg == stack_pointer_rtx)
11469 m->fs.cfa_reg = hard_frame_pointer_rtx;
11470 m->fs.fp_offset = m->fs.sp_offset;
11471 m->fs.fp_valid = true;
11475 if (!int_registers_saved)
11477 /* If saving registers via PUSH, do so now. */
11478 if (!frame.save_regs_using_mov)
11480 ix86_emit_save_regs ();
11481 int_registers_saved = true;
11482 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11485 /* When using red zone we may start register saving before allocating
11486 the stack frame saving one cycle of the prologue. However, avoid
11487 doing this if we have to probe the stack; at least on x86_64 the
11488 stack probe can turn into a call that clobbers a red zone location. */
11489 else if (ix86_using_red_zone ()
11490 && (! TARGET_STACK_PROBE
11491 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11493 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11494 int_registers_saved = true;
11498 if (stack_realign_fp)
11500 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11501 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11503 /* The computation of the size of the re-aligned stack frame means
11504 that we must allocate the size of the register save area before
11505 performing the actual alignment. Otherwise we cannot guarantee
11506 that there's enough storage above the realignment point. */
11507 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11508 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11509 GEN_INT (m->fs.sp_offset
11510 - frame.sse_reg_save_offset),
11513 /* Align the stack. */
11514 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11516 GEN_INT (-align_bytes)));
11518 /* For the purposes of register save area addressing, the stack
11519 pointer is no longer valid. As for the value of sp_offset,
11520 see ix86_compute_frame_layout, which we need to match in order
11521 to pass verification of stack_pointer_offset at the end. */
11522 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11523 m->fs.sp_valid = false;
11526 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11528 if (flag_stack_usage_info)
11530 /* We start to count from ARG_POINTER. */
11531 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11533 /* If it was realigned, take into account the fake frame. */
11534 if (stack_realign_drap)
11536 if (ix86_static_chain_on_stack)
11537 stack_size += UNITS_PER_WORD;
11539 if (!call_used_regs[REGNO (crtl->drap_reg)])
11540 stack_size += UNITS_PER_WORD;
11542 /* This over-estimates by 1 minimal-stack-alignment-unit but
11543 mitigates that by counting in the new return address slot. */
11544 current_function_dynamic_stack_size
11545 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11548 current_function_static_stack_size = stack_size;
11551 /* On SEH target with very large frame size, allocate an area to save
11552 SSE registers (as the very large allocation won't be described). */
11554 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11555 && !sse_registers_saved)
11557 HOST_WIDE_INT sse_size =
11558 frame.sse_reg_save_offset - frame.reg_save_offset;
11560 gcc_assert (int_registers_saved);
11562 /* No need to do stack checking as the area will be immediately
11564 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11565 GEN_INT (-sse_size), -1,
11566 m->fs.cfa_reg == stack_pointer_rtx);
11567 allocate -= sse_size;
11568 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11569 sse_registers_saved = true;
11572 /* The stack has already been decremented by the instruction calling us
11573 so probe if the size is non-negative to preserve the protection area. */
11574 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11576 /* We expect the registers to be saved when probes are used. */
11577 gcc_assert (int_registers_saved);
11579 if (STACK_CHECK_MOVING_SP)
11581 if (!(crtl->is_leaf && !cfun->calls_alloca
11582 && allocate <= PROBE_INTERVAL))
11584 ix86_adjust_stack_and_probe (allocate);
11590 HOST_WIDE_INT size = allocate;
11592 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11593 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11595 if (TARGET_STACK_PROBE)
11597 if (crtl->is_leaf && !cfun->calls_alloca)
11599 if (size > PROBE_INTERVAL)
11600 ix86_emit_probe_stack_range (0, size);
11603 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11607 if (crtl->is_leaf && !cfun->calls_alloca)
11609 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11610 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11611 size - STACK_CHECK_PROTECT);
11614 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11621 else if (!ix86_target_stack_probe ()
11622 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11624 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11625 GEN_INT (-allocate), -1,
11626 m->fs.cfa_reg == stack_pointer_rtx);
11630 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11632 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11633 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11634 bool eax_live = ix86_eax_live_at_start_p ();
11635 bool r10_live = false;
11638 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11642 insn = emit_insn (gen_push (eax));
11643 allocate -= UNITS_PER_WORD;
11644 /* Note that SEH directives need to continue tracking the stack
11645 pointer even after the frame pointer has been set up. */
11646 if (sp_is_cfa_reg || TARGET_SEH)
11649 m->fs.cfa_offset += UNITS_PER_WORD;
11650 RTX_FRAME_RELATED_P (insn) = 1;
11651 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11652 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11653 plus_constant (Pmode, stack_pointer_rtx,
11654 -UNITS_PER_WORD)));
11660 r10 = gen_rtx_REG (Pmode, R10_REG);
11661 insn = emit_insn (gen_push (r10));
11662 allocate -= UNITS_PER_WORD;
11663 if (sp_is_cfa_reg || TARGET_SEH)
11666 m->fs.cfa_offset += UNITS_PER_WORD;
11667 RTX_FRAME_RELATED_P (insn) = 1;
11668 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11669 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11670 plus_constant (Pmode, stack_pointer_rtx,
11671 -UNITS_PER_WORD)));
11675 emit_move_insn (eax, GEN_INT (allocate));
11676 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11678 /* Use the fact that AX still contains ALLOCATE. */
11679 adjust_stack_insn = (Pmode == DImode
11680 ? gen_pro_epilogue_adjust_stack_di_sub
11681 : gen_pro_epilogue_adjust_stack_si_sub);
11683 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11684 stack_pointer_rtx, eax));
11686 if (sp_is_cfa_reg || TARGET_SEH)
11689 m->fs.cfa_offset += allocate;
11690 RTX_FRAME_RELATED_P (insn) = 1;
11691 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11692 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11693 plus_constant (Pmode, stack_pointer_rtx,
11696 m->fs.sp_offset += allocate;
11698 /* Use stack_pointer_rtx for relative addressing so that code
11699 works for realigned stack, too. */
11700 if (r10_live && eax_live)
11702 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11703 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11704 gen_frame_mem (word_mode, t));
11705 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11706 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11707 gen_frame_mem (word_mode, t));
11709 else if (eax_live || r10_live)
11711 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11712 emit_move_insn (gen_rtx_REG (word_mode,
11713 (eax_live ? AX_REG : R10_REG)),
11714 gen_frame_mem (word_mode, t));
11717 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11719 /* If we havn't already set up the frame pointer, do so now. */
11720 if (frame_pointer_needed && !m->fs.fp_valid)
11722 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11723 GEN_INT (frame.stack_pointer_offset
11724 - frame.hard_frame_pointer_offset));
11725 insn = emit_insn (insn);
11726 RTX_FRAME_RELATED_P (insn) = 1;
11727 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11729 if (m->fs.cfa_reg == stack_pointer_rtx)
11730 m->fs.cfa_reg = hard_frame_pointer_rtx;
11731 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11732 m->fs.fp_valid = true;
11735 if (!int_registers_saved)
11736 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11737 if (!sse_registers_saved)
11738 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11740 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11742 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11744 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11745 insn = emit_insn (gen_set_got (pic));
11746 RTX_FRAME_RELATED_P (insn) = 1;
11747 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11748 emit_insn (gen_prologue_use (pic));
11749 /* Deleting already emmitted SET_GOT if exist and allocated to
11750 REAL_PIC_OFFSET_TABLE_REGNUM. */
11751 ix86_elim_entry_set_got (pic);
11754 if (crtl->drap_reg && !crtl->stack_realign_needed)
11756 /* vDRAP is setup but after reload it turns out stack realign
11757 isn't necessary, here we will emit prologue to setup DRAP
11758 without stack realign adjustment */
11759 t = choose_baseaddr (0);
11760 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11763 /* Prevent instructions from being scheduled into register save push
11764 sequence when access to the redzone area is done through frame pointer.
11765 The offset between the frame pointer and the stack pointer is calculated
11766 relative to the value of the stack pointer at the end of the function
11767 prologue, and moving instructions that access redzone area via frame
11768 pointer inside push sequence violates this assumption. */
11769 if (frame_pointer_needed && frame.red_zone_size)
11770 emit_insn (gen_memory_blockage ());
11772 /* Emit cld instruction if stringops are used in the function. */
11773 if (TARGET_CLD && ix86_current_function_needs_cld)
11774 emit_insn (gen_cld ());
11776 /* SEH requires that the prologue end within 256 bytes of the start of
11777 the function. Prevent instruction schedules that would extend that.
11778 Further, prevent alloca modifications to the stack pointer from being
11779 combined with prologue modifications. */
11781 emit_insn (gen_prologue_use (stack_pointer_rtx));
11784 /* Emit code to restore REG using a POP insn. */
11787 ix86_emit_restore_reg_using_pop (rtx reg)
11789 struct machine_function *m = cfun->machine;
11790 rtx insn = emit_insn (gen_pop (reg));
11792 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11793 m->fs.sp_offset -= UNITS_PER_WORD;
11795 if (m->fs.cfa_reg == crtl->drap_reg
11796 && REGNO (reg) == REGNO (crtl->drap_reg))
11798 /* Previously we'd represented the CFA as an expression
11799 like *(%ebp - 8). We've just popped that value from
11800 the stack, which means we need to reset the CFA to
11801 the drap register. This will remain until we restore
11802 the stack pointer. */
11803 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11804 RTX_FRAME_RELATED_P (insn) = 1;
11806 /* This means that the DRAP register is valid for addressing too. */
11807 m->fs.drap_valid = true;
11811 if (m->fs.cfa_reg == stack_pointer_rtx)
11813 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11814 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11815 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11816 RTX_FRAME_RELATED_P (insn) = 1;
11818 m->fs.cfa_offset -= UNITS_PER_WORD;
11821 /* When the frame pointer is the CFA, and we pop it, we are
11822 swapping back to the stack pointer as the CFA. This happens
11823 for stack frames that don't allocate other data, so we assume
11824 the stack pointer is now pointing at the return address, i.e.
11825 the function entry state, which makes the offset be 1 word. */
11826 if (reg == hard_frame_pointer_rtx)
11828 m->fs.fp_valid = false;
11829 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11831 m->fs.cfa_reg = stack_pointer_rtx;
11832 m->fs.cfa_offset -= UNITS_PER_WORD;
11834 add_reg_note (insn, REG_CFA_DEF_CFA,
11835 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11836 GEN_INT (m->fs.cfa_offset)));
11837 RTX_FRAME_RELATED_P (insn) = 1;
11842 /* Emit code to restore saved registers using POP insns. */
11845 ix86_emit_restore_regs_using_pop (void)
11847 unsigned int regno;
11849 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11850 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11851 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11854 /* Emit code and notes for the LEAVE instruction. */
11857 ix86_emit_leave (void)
11859 struct machine_function *m = cfun->machine;
11860 rtx insn = emit_insn (ix86_gen_leave ());
11862 ix86_add_queued_cfa_restore_notes (insn);
11864 gcc_assert (m->fs.fp_valid);
11865 m->fs.sp_valid = true;
11866 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11867 m->fs.fp_valid = false;
11869 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11871 m->fs.cfa_reg = stack_pointer_rtx;
11872 m->fs.cfa_offset = m->fs.sp_offset;
11874 add_reg_note (insn, REG_CFA_DEF_CFA,
11875 plus_constant (Pmode, stack_pointer_rtx,
11877 RTX_FRAME_RELATED_P (insn) = 1;
11879 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11883 /* Emit code to restore saved registers using MOV insns.
11884 First register is restored from CFA - CFA_OFFSET. */
11886 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11887 bool maybe_eh_return)
11889 struct machine_function *m = cfun->machine;
11890 unsigned int regno;
11892 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11893 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11895 rtx reg = gen_rtx_REG (word_mode, regno);
11898 mem = choose_baseaddr (cfa_offset);
11899 mem = gen_frame_mem (word_mode, mem);
11900 insn = emit_move_insn (reg, mem);
11902 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11904 /* Previously we'd represented the CFA as an expression
11905 like *(%ebp - 8). We've just popped that value from
11906 the stack, which means we need to reset the CFA to
11907 the drap register. This will remain until we restore
11908 the stack pointer. */
11909 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11910 RTX_FRAME_RELATED_P (insn) = 1;
11912 /* This means that the DRAP register is valid for addressing. */
11913 m->fs.drap_valid = true;
11916 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11918 cfa_offset -= UNITS_PER_WORD;
11922 /* Emit code to restore saved registers using MOV insns.
11923 First register is restored from CFA - CFA_OFFSET. */
11925 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11926 bool maybe_eh_return)
11928 unsigned int regno;
11930 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11931 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11933 rtx reg = gen_rtx_REG (V4SFmode, regno);
11936 mem = choose_baseaddr (cfa_offset);
11937 mem = gen_rtx_MEM (V4SFmode, mem);
11938 set_mem_align (mem, 128);
11939 emit_move_insn (reg, mem);
11941 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11947 /* Restore function stack, frame, and registers. */
11950 ix86_expand_epilogue (int style)
11952 struct machine_function *m = cfun->machine;
11953 struct machine_frame_state frame_state_save = m->fs;
11954 struct ix86_frame frame;
11955 bool restore_regs_via_mov;
11958 ix86_finalize_stack_realign_flags ();
11959 ix86_compute_frame_layout (&frame);
11961 m->fs.sp_valid = (!frame_pointer_needed
11962 || (crtl->sp_is_unchanging
11963 && !stack_realign_fp));
11964 gcc_assert (!m->fs.sp_valid
11965 || m->fs.sp_offset == frame.stack_pointer_offset);
11967 /* The FP must be valid if the frame pointer is present. */
11968 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11969 gcc_assert (!m->fs.fp_valid
11970 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11972 /* We must have *some* valid pointer to the stack frame. */
11973 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11975 /* The DRAP is never valid at this point. */
11976 gcc_assert (!m->fs.drap_valid);
11978 /* See the comment about red zone and frame
11979 pointer usage in ix86_expand_prologue. */
11980 if (frame_pointer_needed && frame.red_zone_size)
11981 emit_insn (gen_memory_blockage ());
11983 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11984 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11986 /* Determine the CFA offset of the end of the red-zone. */
11987 m->fs.red_zone_offset = 0;
11988 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11990 /* The red-zone begins below the return address. */
11991 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11993 /* When the register save area is in the aligned portion of
11994 the stack, determine the maximum runtime displacement that
11995 matches up with the aligned frame. */
11996 if (stack_realign_drap)
11997 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
12001 /* Special care must be taken for the normal return case of a function
12002 using eh_return: the eax and edx registers are marked as saved, but
12003 not restored along this path. Adjust the save location to match. */
12004 if (crtl->calls_eh_return && style != 2)
12005 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
12007 /* EH_RETURN requires the use of moves to function properly. */
12008 if (crtl->calls_eh_return)
12009 restore_regs_via_mov = true;
12010 /* SEH requires the use of pops to identify the epilogue. */
12011 else if (TARGET_SEH)
12012 restore_regs_via_mov = false;
12013 /* If we're only restoring one register and sp is not valid then
12014 using a move instruction to restore the register since it's
12015 less work than reloading sp and popping the register. */
12016 else if (!m->fs.sp_valid && frame.nregs <= 1)
12017 restore_regs_via_mov = true;
12018 else if (TARGET_EPILOGUE_USING_MOVE
12019 && cfun->machine->use_fast_prologue_epilogue
12020 && (frame.nregs > 1
12021 || m->fs.sp_offset != frame.reg_save_offset))
12022 restore_regs_via_mov = true;
12023 else if (frame_pointer_needed
12025 && m->fs.sp_offset != frame.reg_save_offset)
12026 restore_regs_via_mov = true;
12027 else if (frame_pointer_needed
12028 && TARGET_USE_LEAVE
12029 && cfun->machine->use_fast_prologue_epilogue
12030 && frame.nregs == 1)
12031 restore_regs_via_mov = true;
12033 restore_regs_via_mov = false;
12035 if (restore_regs_via_mov || frame.nsseregs)
12037 /* Ensure that the entire register save area is addressable via
12038 the stack pointer, if we will restore via sp. */
12040 && m->fs.sp_offset > 0x7fffffff
12041 && !(m->fs.fp_valid || m->fs.drap_valid)
12042 && (frame.nsseregs + frame.nregs) != 0)
12044 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12045 GEN_INT (m->fs.sp_offset
12046 - frame.sse_reg_save_offset),
12048 m->fs.cfa_reg == stack_pointer_rtx);
12052 /* If there are any SSE registers to restore, then we have to do it
12053 via moves, since there's obviously no pop for SSE regs. */
12054 if (frame.nsseregs)
12055 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
12058 if (restore_regs_via_mov)
12063 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
12065 /* eh_return epilogues need %ecx added to the stack pointer. */
12068 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
12070 /* Stack align doesn't work with eh_return. */
12071 gcc_assert (!stack_realign_drap);
12072 /* Neither does regparm nested functions. */
12073 gcc_assert (!ix86_static_chain_on_stack);
12075 if (frame_pointer_needed)
12077 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
12078 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
12079 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
12081 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
12082 insn = emit_move_insn (hard_frame_pointer_rtx, t);
12084 /* Note that we use SA as a temporary CFA, as the return
12085 address is at the proper place relative to it. We
12086 pretend this happens at the FP restore insn because
12087 prior to this insn the FP would be stored at the wrong
12088 offset relative to SA, and after this insn we have no
12089 other reasonable register to use for the CFA. We don't
12090 bother resetting the CFA to the SP for the duration of
12091 the return insn. */
12092 add_reg_note (insn, REG_CFA_DEF_CFA,
12093 plus_constant (Pmode, sa, UNITS_PER_WORD));
12094 ix86_add_queued_cfa_restore_notes (insn);
12095 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
12096 RTX_FRAME_RELATED_P (insn) = 1;
12098 m->fs.cfa_reg = sa;
12099 m->fs.cfa_offset = UNITS_PER_WORD;
12100 m->fs.fp_valid = false;
12102 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
12103 const0_rtx, style, false);
12107 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
12108 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
12109 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
12110 ix86_add_queued_cfa_restore_notes (insn);
12112 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12113 if (m->fs.cfa_offset != UNITS_PER_WORD)
12115 m->fs.cfa_offset = UNITS_PER_WORD;
12116 add_reg_note (insn, REG_CFA_DEF_CFA,
12117 plus_constant (Pmode, stack_pointer_rtx,
12119 RTX_FRAME_RELATED_P (insn) = 1;
12122 m->fs.sp_offset = UNITS_PER_WORD;
12123 m->fs.sp_valid = true;
12128 /* SEH requires that the function end with (1) a stack adjustment
12129 if necessary, (2) a sequence of pops, and (3) a return or
12130 jump instruction. Prevent insns from the function body from
12131 being scheduled into this sequence. */
12134 /* Prevent a catch region from being adjacent to the standard
12135 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12136 several other flags that would be interesting to test are
12138 if (flag_non_call_exceptions)
12139 emit_insn (gen_nops (const1_rtx));
12141 emit_insn (gen_blockage ());
12144 /* First step is to deallocate the stack frame so that we can
12145 pop the registers. Also do it on SEH target for very large
12146 frame as the emitted instructions aren't allowed by the ABI in
12148 if (!m->fs.sp_valid
12150 && (m->fs.sp_offset - frame.reg_save_offset
12151 >= SEH_MAX_FRAME_SIZE)))
12153 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12154 GEN_INT (m->fs.fp_offset
12155 - frame.reg_save_offset),
12158 else if (m->fs.sp_offset != frame.reg_save_offset)
12160 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12161 GEN_INT (m->fs.sp_offset
12162 - frame.reg_save_offset),
12164 m->fs.cfa_reg == stack_pointer_rtx);
12167 ix86_emit_restore_regs_using_pop ();
12170 /* If we used a stack pointer and haven't already got rid of it,
12172 if (m->fs.fp_valid)
12174 /* If the stack pointer is valid and pointing at the frame
12175 pointer store address, then we only need a pop. */
12176 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12177 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12178 /* Leave results in shorter dependency chains on CPUs that are
12179 able to grok it fast. */
12180 else if (TARGET_USE_LEAVE
12181 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12182 || !cfun->machine->use_fast_prologue_epilogue)
12183 ix86_emit_leave ();
12186 pro_epilogue_adjust_stack (stack_pointer_rtx,
12187 hard_frame_pointer_rtx,
12188 const0_rtx, style, !using_drap);
12189 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12195 int param_ptr_offset = UNITS_PER_WORD;
12198 gcc_assert (stack_realign_drap);
12200 if (ix86_static_chain_on_stack)
12201 param_ptr_offset += UNITS_PER_WORD;
12202 if (!call_used_regs[REGNO (crtl->drap_reg)])
12203 param_ptr_offset += UNITS_PER_WORD;
12205 insn = emit_insn (gen_rtx_SET
12206 (VOIDmode, stack_pointer_rtx,
12207 gen_rtx_PLUS (Pmode,
12209 GEN_INT (-param_ptr_offset))));
12210 m->fs.cfa_reg = stack_pointer_rtx;
12211 m->fs.cfa_offset = param_ptr_offset;
12212 m->fs.sp_offset = param_ptr_offset;
12213 m->fs.realigned = false;
12215 add_reg_note (insn, REG_CFA_DEF_CFA,
12216 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12217 GEN_INT (param_ptr_offset)));
12218 RTX_FRAME_RELATED_P (insn) = 1;
12220 if (!call_used_regs[REGNO (crtl->drap_reg)])
12221 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12224 /* At this point the stack pointer must be valid, and we must have
12225 restored all of the registers. We may not have deallocated the
12226 entire stack frame. We've delayed this until now because it may
12227 be possible to merge the local stack deallocation with the
12228 deallocation forced by ix86_static_chain_on_stack. */
12229 gcc_assert (m->fs.sp_valid);
12230 gcc_assert (!m->fs.fp_valid);
12231 gcc_assert (!m->fs.realigned);
12232 if (m->fs.sp_offset != UNITS_PER_WORD)
12234 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12235 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12239 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12241 /* Sibcall epilogues don't want a return instruction. */
12244 m->fs = frame_state_save;
12248 if (crtl->args.pops_args && crtl->args.size)
12250 rtx popc = GEN_INT (crtl->args.pops_args);
12252 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12253 address, do explicit add, and jump indirectly to the caller. */
12255 if (crtl->args.pops_args >= 65536)
12257 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12260 /* There is no "pascal" calling convention in any 64bit ABI. */
12261 gcc_assert (!TARGET_64BIT);
12263 insn = emit_insn (gen_pop (ecx));
12264 m->fs.cfa_offset -= UNITS_PER_WORD;
12265 m->fs.sp_offset -= UNITS_PER_WORD;
12267 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12268 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
12269 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12270 add_reg_note (insn, REG_CFA_REGISTER,
12271 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
12272 RTX_FRAME_RELATED_P (insn) = 1;
12274 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12276 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12279 emit_jump_insn (gen_simple_return_pop_internal (popc));
12282 emit_jump_insn (gen_simple_return_internal ());
12284 /* Restore the state back to the state from the prologue,
12285 so that it's correct for the next epilogue. */
12286 m->fs = frame_state_save;
12289 /* Reset from the function's potential modifications. */
12292 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12294 if (pic_offset_table_rtx
12295 && !ix86_use_pseudo_pic_reg ())
12296 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12298 /* Mach-O doesn't support labels at the end of objects, so if
12299 it looks like we might want one, insert a NOP. */
12301 rtx_insn *insn = get_last_insn ();
12302 rtx_insn *deleted_debug_label = NULL;
12305 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12307 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12308 notes only, instead set their CODE_LABEL_NUMBER to -1,
12309 otherwise there would be code generation differences
12310 in between -g and -g0. */
12311 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12312 deleted_debug_label = insn;
12313 insn = PREV_INSN (insn);
12318 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12319 fputs ("\tnop\n", file);
12320 else if (deleted_debug_label)
12321 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12322 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12323 CODE_LABEL_NUMBER (insn) = -1;
12329 /* Return a scratch register to use in the split stack prologue. The
12330 split stack prologue is used for -fsplit-stack. It is the first
12331 instructions in the function, even before the regular prologue.
12332 The scratch register can be any caller-saved register which is not
12333 used for parameters or for the static chain. */
12335 static unsigned int
12336 split_stack_prologue_scratch_regno (void)
12342 bool is_fastcall, is_thiscall;
12345 is_fastcall = (lookup_attribute ("fastcall",
12346 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12348 is_thiscall = (lookup_attribute ("thiscall",
12349 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12351 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12355 if (DECL_STATIC_CHAIN (cfun->decl))
12357 sorry ("-fsplit-stack does not support fastcall with "
12358 "nested function");
12359 return INVALID_REGNUM;
12363 else if (is_thiscall)
12365 if (!DECL_STATIC_CHAIN (cfun->decl))
12369 else if (regparm < 3)
12371 if (!DECL_STATIC_CHAIN (cfun->decl))
12377 sorry ("-fsplit-stack does not support 2 register "
12378 "parameters for a nested function");
12379 return INVALID_REGNUM;
12386 /* FIXME: We could make this work by pushing a register
12387 around the addition and comparison. */
12388 sorry ("-fsplit-stack does not support 3 register parameters");
12389 return INVALID_REGNUM;
12394 /* A SYMBOL_REF for the function which allocates new stackspace for
12397 static GTY(()) rtx split_stack_fn;
12399 /* A SYMBOL_REF for the more stack function when using the large
12402 static GTY(()) rtx split_stack_fn_large;
12404 /* Handle -fsplit-stack. These are the first instructions in the
12405 function, even before the regular prologue. */
12408 ix86_expand_split_stack_prologue (void)
12410 struct ix86_frame frame;
12411 HOST_WIDE_INT allocate;
12412 unsigned HOST_WIDE_INT args_size;
12413 rtx_code_label *label;
12414 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12415 rtx scratch_reg = NULL_RTX;
12416 rtx_code_label *varargs_label = NULL;
12419 gcc_assert (flag_split_stack && reload_completed);
12421 ix86_finalize_stack_realign_flags ();
12422 ix86_compute_frame_layout (&frame);
12423 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12425 /* This is the label we will branch to if we have enough stack
12426 space. We expect the basic block reordering pass to reverse this
12427 branch if optimizing, so that we branch in the unlikely case. */
12428 label = gen_label_rtx ();
12430 /* We need to compare the stack pointer minus the frame size with
12431 the stack boundary in the TCB. The stack boundary always gives
12432 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12433 can compare directly. Otherwise we need to do an addition. */
12435 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12436 UNSPEC_STACK_CHECK);
12437 limit = gen_rtx_CONST (Pmode, limit);
12438 limit = gen_rtx_MEM (Pmode, limit);
12439 if (allocate < SPLIT_STACK_AVAILABLE)
12440 current = stack_pointer_rtx;
12443 unsigned int scratch_regno;
12446 /* We need a scratch register to hold the stack pointer minus
12447 the required frame size. Since this is the very start of the
12448 function, the scratch register can be any caller-saved
12449 register which is not used for parameters. */
12450 offset = GEN_INT (- allocate);
12451 scratch_regno = split_stack_prologue_scratch_regno ();
12452 if (scratch_regno == INVALID_REGNUM)
12454 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12455 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12457 /* We don't use ix86_gen_add3 in this case because it will
12458 want to split to lea, but when not optimizing the insn
12459 will not be split after this point. */
12460 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12461 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12466 emit_move_insn (scratch_reg, offset);
12467 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12468 stack_pointer_rtx));
12470 current = scratch_reg;
12473 ix86_expand_branch (GEU, current, limit, label);
12474 jump_insn = get_last_insn ();
12475 JUMP_LABEL (jump_insn) = label;
12477 /* Mark the jump as very likely to be taken. */
12478 add_int_reg_note (jump_insn, REG_BR_PROB,
12479 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12481 if (split_stack_fn == NULL_RTX)
12483 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12484 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12486 fn = split_stack_fn;
12488 /* Get more stack space. We pass in the desired stack space and the
12489 size of the arguments to copy to the new stack. In 32-bit mode
12490 we push the parameters; __morestack will return on a new stack
12491 anyhow. In 64-bit mode we pass the parameters in r10 and
12493 allocate_rtx = GEN_INT (allocate);
12494 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12495 call_fusage = NULL_RTX;
12500 reg10 = gen_rtx_REG (Pmode, R10_REG);
12501 reg11 = gen_rtx_REG (Pmode, R11_REG);
12503 /* If this function uses a static chain, it will be in %r10.
12504 Preserve it across the call to __morestack. */
12505 if (DECL_STATIC_CHAIN (cfun->decl))
12509 rax = gen_rtx_REG (word_mode, AX_REG);
12510 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12511 use_reg (&call_fusage, rax);
12514 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12517 HOST_WIDE_INT argval;
12519 gcc_assert (Pmode == DImode);
12520 /* When using the large model we need to load the address
12521 into a register, and we've run out of registers. So we
12522 switch to a different calling convention, and we call a
12523 different function: __morestack_large. We pass the
12524 argument size in the upper 32 bits of r10 and pass the
12525 frame size in the lower 32 bits. */
12526 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12527 gcc_assert ((args_size & 0xffffffff) == args_size);
12529 if (split_stack_fn_large == NULL_RTX)
12531 split_stack_fn_large =
12532 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12533 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12535 if (ix86_cmodel == CM_LARGE_PIC)
12537 rtx_code_label *label;
12540 label = gen_label_rtx ();
12541 emit_label (label);
12542 LABEL_PRESERVE_P (label) = 1;
12543 emit_insn (gen_set_rip_rex64 (reg10, label));
12544 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12545 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12546 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12548 x = gen_rtx_CONST (Pmode, x);
12549 emit_move_insn (reg11, x);
12550 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12551 x = gen_const_mem (Pmode, x);
12552 emit_move_insn (reg11, x);
12555 emit_move_insn (reg11, split_stack_fn_large);
12559 argval = ((args_size << 16) << 16) + allocate;
12560 emit_move_insn (reg10, GEN_INT (argval));
12564 emit_move_insn (reg10, allocate_rtx);
12565 emit_move_insn (reg11, GEN_INT (args_size));
12566 use_reg (&call_fusage, reg11);
12569 use_reg (&call_fusage, reg10);
12573 emit_insn (gen_push (GEN_INT (args_size)));
12574 emit_insn (gen_push (allocate_rtx));
12576 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12577 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12579 add_function_usage_to (call_insn, call_fusage);
12581 /* In order to make call/return prediction work right, we now need
12582 to execute a return instruction. See
12583 libgcc/config/i386/morestack.S for the details on how this works.
12585 For flow purposes gcc must not see this as a return
12586 instruction--we need control flow to continue at the subsequent
12587 label. Therefore, we use an unspec. */
12588 gcc_assert (crtl->args.pops_args < 65536);
12589 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12591 /* If we are in 64-bit mode and this function uses a static chain,
12592 we saved %r10 in %rax before calling _morestack. */
12593 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12594 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12595 gen_rtx_REG (word_mode, AX_REG));
12597 /* If this function calls va_start, we need to store a pointer to
12598 the arguments on the old stack, because they may not have been
12599 all copied to the new stack. At this point the old stack can be
12600 found at the frame pointer value used by __morestack, because
12601 __morestack has set that up before calling back to us. Here we
12602 store that pointer in a scratch register, and in
12603 ix86_expand_prologue we store the scratch register in a stack
12605 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12607 unsigned int scratch_regno;
12611 scratch_regno = split_stack_prologue_scratch_regno ();
12612 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12613 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12617 return address within this function
12618 return address of caller of this function
12620 So we add three words to get to the stack arguments.
12624 return address within this function
12625 first argument to __morestack
12626 second argument to __morestack
12627 return address of caller of this function
12629 So we add five words to get to the stack arguments.
12631 words = TARGET_64BIT ? 3 : 5;
12632 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12633 gen_rtx_PLUS (Pmode, frame_reg,
12634 GEN_INT (words * UNITS_PER_WORD))));
12636 varargs_label = gen_label_rtx ();
12637 emit_jump_insn (gen_jump (varargs_label));
12638 JUMP_LABEL (get_last_insn ()) = varargs_label;
12643 emit_label (label);
12644 LABEL_NUSES (label) = 1;
12646 /* If this function calls va_start, we now have to set the scratch
12647 register for the case where we do not call __morestack. In this
12648 case we need to set it based on the stack pointer. */
12649 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12651 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12652 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12653 GEN_INT (UNITS_PER_WORD))));
12655 emit_label (varargs_label);
12656 LABEL_NUSES (varargs_label) = 1;
12660 /* We may have to tell the dataflow pass that the split stack prologue
12661 is initializing a scratch register. */
12664 ix86_live_on_entry (bitmap regs)
12666 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12668 gcc_assert (flag_split_stack);
12669 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12673 /* Extract the parts of an RTL expression that is a valid memory address
12674 for an instruction. Return 0 if the structure of the address is
12675 grossly off. Return -1 if the address contains ASHIFT, so it is not
12676 strictly valid, but still used for computing length of lea instruction. */
12679 ix86_decompose_address (rtx addr, struct ix86_address *out)
12681 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12682 rtx base_reg, index_reg;
12683 HOST_WIDE_INT scale = 1;
12684 rtx scale_rtx = NULL_RTX;
12687 enum ix86_address_seg seg = SEG_DEFAULT;
12689 /* Allow zero-extended SImode addresses,
12690 they will be emitted with addr32 prefix. */
12691 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12693 if (GET_CODE (addr) == ZERO_EXTEND
12694 && GET_MODE (XEXP (addr, 0)) == SImode)
12696 addr = XEXP (addr, 0);
12697 if (CONST_INT_P (addr))
12700 else if (GET_CODE (addr) == AND
12701 && const_32bit_mask (XEXP (addr, 1), DImode))
12703 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12704 if (addr == NULL_RTX)
12707 if (CONST_INT_P (addr))
12712 /* Allow SImode subregs of DImode addresses,
12713 they will be emitted with addr32 prefix. */
12714 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12716 if (GET_CODE (addr) == SUBREG
12717 && GET_MODE (SUBREG_REG (addr)) == DImode)
12719 addr = SUBREG_REG (addr);
12720 if (CONST_INT_P (addr))
12727 else if (GET_CODE (addr) == SUBREG)
12729 if (REG_P (SUBREG_REG (addr)))
12734 else if (GET_CODE (addr) == PLUS)
12736 rtx addends[4], op;
12744 addends[n++] = XEXP (op, 1);
12747 while (GET_CODE (op) == PLUS);
12752 for (i = n; i >= 0; --i)
12755 switch (GET_CODE (op))
12760 index = XEXP (op, 0);
12761 scale_rtx = XEXP (op, 1);
12767 index = XEXP (op, 0);
12768 tmp = XEXP (op, 1);
12769 if (!CONST_INT_P (tmp))
12771 scale = INTVAL (tmp);
12772 if ((unsigned HOST_WIDE_INT) scale > 3)
12774 scale = 1 << scale;
12779 if (GET_CODE (op) != UNSPEC)
12784 if (XINT (op, 1) == UNSPEC_TP
12785 && TARGET_TLS_DIRECT_SEG_REFS
12786 && seg == SEG_DEFAULT)
12787 seg = DEFAULT_TLS_SEG_REG;
12793 if (!REG_P (SUBREG_REG (op)))
12820 else if (GET_CODE (addr) == MULT)
12822 index = XEXP (addr, 0); /* index*scale */
12823 scale_rtx = XEXP (addr, 1);
12825 else if (GET_CODE (addr) == ASHIFT)
12827 /* We're called for lea too, which implements ashift on occasion. */
12828 index = XEXP (addr, 0);
12829 tmp = XEXP (addr, 1);
12830 if (!CONST_INT_P (tmp))
12832 scale = INTVAL (tmp);
12833 if ((unsigned HOST_WIDE_INT) scale > 3)
12835 scale = 1 << scale;
12839 disp = addr; /* displacement */
12845 else if (GET_CODE (index) == SUBREG
12846 && REG_P (SUBREG_REG (index)))
12852 /* Extract the integral value of scale. */
12855 if (!CONST_INT_P (scale_rtx))
12857 scale = INTVAL (scale_rtx);
12860 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12861 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12863 /* Avoid useless 0 displacement. */
12864 if (disp == const0_rtx && (base || index))
12867 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12868 if (base_reg && index_reg && scale == 1
12869 && (index_reg == arg_pointer_rtx
12870 || index_reg == frame_pointer_rtx
12871 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12873 std::swap (base, index);
12874 std::swap (base_reg, index_reg);
12877 /* Special case: %ebp cannot be encoded as a base without a displacement.
12881 && (base_reg == hard_frame_pointer_rtx
12882 || base_reg == frame_pointer_rtx
12883 || base_reg == arg_pointer_rtx
12884 || (REG_P (base_reg)
12885 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12886 || REGNO (base_reg) == R13_REG))))
12889 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12890 Avoid this by transforming to [%esi+0].
12891 Reload calls address legitimization without cfun defined, so we need
12892 to test cfun for being non-NULL. */
12893 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12894 && base_reg && !index_reg && !disp
12895 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12898 /* Special case: encode reg+reg instead of reg*2. */
12899 if (!base && index && scale == 2)
12900 base = index, base_reg = index_reg, scale = 1;
12902 /* Special case: scaling cannot be encoded without base or displacement. */
12903 if (!base && !disp && index && scale != 1)
12907 out->index = index;
12909 out->scale = scale;
12915 /* Return cost of the memory address x.
12916 For i386, it is better to use a complex address than let gcc copy
12917 the address into a reg and make a new pseudo. But not if the address
12918 requires to two regs - that would mean more pseudos with longer
12921 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
12923 struct ix86_address parts;
12925 int ok = ix86_decompose_address (x, &parts);
12929 if (parts.base && GET_CODE (parts.base) == SUBREG)
12930 parts.base = SUBREG_REG (parts.base);
12931 if (parts.index && GET_CODE (parts.index) == SUBREG)
12932 parts.index = SUBREG_REG (parts.index);
12934 /* Attempt to minimize number of registers in the address. */
12936 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12938 && (!REG_P (parts.index)
12939 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12942 /* When address base or index is "pic_offset_table_rtx" we don't increase
12943 address cost. When a memopt with "pic_offset_table_rtx" is not invariant
12944 itself it most likely means that base or index is not invariant.
12945 Therefore only "pic_offset_table_rtx" could be hoisted out, which is not
12946 profitable for x86. */
12948 && (current_pass->type == GIMPLE_PASS
12949 || (!pic_offset_table_rtx
12950 || REGNO (pic_offset_table_rtx) != REGNO(parts.base)))
12951 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12953 && (current_pass->type == GIMPLE_PASS
12954 || (!pic_offset_table_rtx
12955 || REGNO (pic_offset_table_rtx) != REGNO(parts.index)))
12956 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12957 && parts.base != parts.index)
12960 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12961 since it's predecode logic can't detect the length of instructions
12962 and it degenerates to vector decoded. Increase cost of such
12963 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12964 to split such addresses or even refuse such addresses at all.
12966 Following addressing modes are affected:
12971 The first and last case may be avoidable by explicitly coding the zero in
12972 memory address, but I don't have AMD-K6 machine handy to check this
12976 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12977 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12978 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12984 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12985 this is used for to form addresses to local data when -fPIC is in
12989 darwin_local_data_pic (rtx disp)
12991 return (GET_CODE (disp) == UNSPEC
12992 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12995 /* Determine if a given RTX is a valid constant. We already know this
12996 satisfies CONSTANT_P. */
12999 ix86_legitimate_constant_p (machine_mode, rtx x)
13001 /* Pointer bounds constants are not valid. */
13002 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
13005 switch (GET_CODE (x))
13010 if (GET_CODE (x) == PLUS)
13012 if (!CONST_INT_P (XEXP (x, 1)))
13017 if (TARGET_MACHO && darwin_local_data_pic (x))
13020 /* Only some unspecs are valid as "constants". */
13021 if (GET_CODE (x) == UNSPEC)
13022 switch (XINT (x, 1))
13025 case UNSPEC_GOTOFF:
13026 case UNSPEC_PLTOFF:
13027 return TARGET_64BIT;
13029 case UNSPEC_NTPOFF:
13030 x = XVECEXP (x, 0, 0);
13031 return (GET_CODE (x) == SYMBOL_REF
13032 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13033 case UNSPEC_DTPOFF:
13034 x = XVECEXP (x, 0, 0);
13035 return (GET_CODE (x) == SYMBOL_REF
13036 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
13041 /* We must have drilled down to a symbol. */
13042 if (GET_CODE (x) == LABEL_REF)
13044 if (GET_CODE (x) != SYMBOL_REF)
13049 /* TLS symbols are never valid. */
13050 if (SYMBOL_REF_TLS_MODEL (x))
13053 /* DLLIMPORT symbols are never valid. */
13054 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13055 && SYMBOL_REF_DLLIMPORT_P (x))
13059 /* mdynamic-no-pic */
13060 if (MACHO_DYNAMIC_NO_PIC_P)
13061 return machopic_symbol_defined_p (x);
13066 if (GET_MODE (x) == TImode
13067 && x != CONST0_RTX (TImode)
13073 if (!standard_sse_constant_p (x))
13080 /* Otherwise we handle everything else in the move patterns. */
13084 /* Determine if it's legal to put X into the constant pool. This
13085 is not possible for the address of thread-local symbols, which
13086 is checked above. */
13089 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
13091 /* We can always put integral constants and vectors in memory. */
13092 switch (GET_CODE (x))
13102 return !ix86_legitimate_constant_p (mode, x);
13105 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
13109 is_imported_p (rtx x)
13111 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
13112 || GET_CODE (x) != SYMBOL_REF)
13115 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13119 /* Nonzero if the constant value X is a legitimate general operand
13120 when generating PIC code. It is given that flag_pic is on and
13121 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
13124 legitimate_pic_operand_p (rtx x)
13128 switch (GET_CODE (x))
13131 inner = XEXP (x, 0);
13132 if (GET_CODE (inner) == PLUS
13133 && CONST_INT_P (XEXP (inner, 1)))
13134 inner = XEXP (inner, 0);
13136 /* Only some unspecs are valid as "constants". */
13137 if (GET_CODE (inner) == UNSPEC)
13138 switch (XINT (inner, 1))
13141 case UNSPEC_GOTOFF:
13142 case UNSPEC_PLTOFF:
13143 return TARGET_64BIT;
13145 x = XVECEXP (inner, 0, 0);
13146 return (GET_CODE (x) == SYMBOL_REF
13147 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13148 case UNSPEC_MACHOPIC_OFFSET:
13149 return legitimate_pic_address_disp_p (x);
13157 return legitimate_pic_address_disp_p (x);
13164 /* Determine if a given CONST RTX is a valid memory displacement
13168 legitimate_pic_address_disp_p (rtx disp)
13172 /* In 64bit mode we can allow direct addresses of symbols and labels
13173 when they are not dynamic symbols. */
13176 rtx op0 = disp, op1;
13178 switch (GET_CODE (disp))
13184 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13186 op0 = XEXP (XEXP (disp, 0), 0);
13187 op1 = XEXP (XEXP (disp, 0), 1);
13188 if (!CONST_INT_P (op1)
13189 || INTVAL (op1) >= 16*1024*1024
13190 || INTVAL (op1) < -16*1024*1024)
13192 if (GET_CODE (op0) == LABEL_REF)
13194 if (GET_CODE (op0) == CONST
13195 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13196 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13198 if (GET_CODE (op0) == UNSPEC
13199 && XINT (op0, 1) == UNSPEC_PCREL)
13201 if (GET_CODE (op0) != SYMBOL_REF)
13206 /* TLS references should always be enclosed in UNSPEC.
13207 The dllimported symbol needs always to be resolved. */
13208 if (SYMBOL_REF_TLS_MODEL (op0)
13209 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13214 if (is_imported_p (op0))
13217 if (SYMBOL_REF_FAR_ADDR_P (op0)
13218 || !SYMBOL_REF_LOCAL_P (op0))
13221 /* Function-symbols need to be resolved only for
13223 For the small-model we don't need to resolve anything
13225 if ((ix86_cmodel != CM_LARGE_PIC
13226 && SYMBOL_REF_FUNCTION_P (op0))
13227 || ix86_cmodel == CM_SMALL_PIC)
13229 /* Non-external symbols don't need to be resolved for
13230 large, and medium-model. */
13231 if ((ix86_cmodel == CM_LARGE_PIC
13232 || ix86_cmodel == CM_MEDIUM_PIC)
13233 && !SYMBOL_REF_EXTERNAL_P (op0))
13236 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13237 && (SYMBOL_REF_LOCAL_P (op0)
13238 || (HAVE_LD_PIE_COPYRELOC
13240 && !SYMBOL_REF_WEAK (op0)
13241 && !SYMBOL_REF_FUNCTION_P (op0)))
13242 && ix86_cmodel != CM_LARGE_PIC)
13250 if (GET_CODE (disp) != CONST)
13252 disp = XEXP (disp, 0);
13256 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13257 of GOT tables. We should not need these anyway. */
13258 if (GET_CODE (disp) != UNSPEC
13259 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13260 && XINT (disp, 1) != UNSPEC_GOTOFF
13261 && XINT (disp, 1) != UNSPEC_PCREL
13262 && XINT (disp, 1) != UNSPEC_PLTOFF))
13265 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13266 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13272 if (GET_CODE (disp) == PLUS)
13274 if (!CONST_INT_P (XEXP (disp, 1)))
13276 disp = XEXP (disp, 0);
13280 if (TARGET_MACHO && darwin_local_data_pic (disp))
13283 if (GET_CODE (disp) != UNSPEC)
13286 switch (XINT (disp, 1))
13291 /* We need to check for both symbols and labels because VxWorks loads
13292 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13294 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13295 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13296 case UNSPEC_GOTOFF:
13297 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13298 While ABI specify also 32bit relocation but we don't produce it in
13299 small PIC model at all. */
13300 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13301 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13303 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13305 case UNSPEC_GOTTPOFF:
13306 case UNSPEC_GOTNTPOFF:
13307 case UNSPEC_INDNTPOFF:
13310 disp = XVECEXP (disp, 0, 0);
13311 return (GET_CODE (disp) == SYMBOL_REF
13312 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13313 case UNSPEC_NTPOFF:
13314 disp = XVECEXP (disp, 0, 0);
13315 return (GET_CODE (disp) == SYMBOL_REF
13316 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13317 case UNSPEC_DTPOFF:
13318 disp = XVECEXP (disp, 0, 0);
13319 return (GET_CODE (disp) == SYMBOL_REF
13320 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13326 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
13327 replace the input X, or the original X if no replacement is called for.
13328 The output parameter *WIN is 1 if the calling macro should goto WIN,
13329 0 if it should not. */
13332 ix86_legitimize_reload_address (rtx x, machine_mode, int opnum, int type,
13335 /* Reload can generate:
13337 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
13341 This RTX is rejected from ix86_legitimate_address_p due to
13342 non-strictness of base register 97. Following this rejection,
13343 reload pushes all three components into separate registers,
13344 creating invalid memory address RTX.
13346 Following code reloads only the invalid part of the
13347 memory address RTX. */
13349 if (GET_CODE (x) == PLUS
13350 && REG_P (XEXP (x, 1))
13351 && GET_CODE (XEXP (x, 0)) == PLUS
13352 && REG_P (XEXP (XEXP (x, 0), 1)))
13355 bool something_reloaded = false;
13357 base = XEXP (XEXP (x, 0), 1);
13358 if (!REG_OK_FOR_BASE_STRICT_P (base))
13360 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
13361 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13362 opnum, (enum reload_type) type);
13363 something_reloaded = true;
13366 index = XEXP (x, 1);
13367 if (!REG_OK_FOR_INDEX_STRICT_P (index))
13369 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
13370 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13371 opnum, (enum reload_type) type);
13372 something_reloaded = true;
13375 gcc_assert (something_reloaded);
13382 /* Determine if op is suitable RTX for an address register.
13383 Return naked register if a register or a register subreg is
13384 found, otherwise return NULL_RTX. */
13387 ix86_validate_address_register (rtx op)
13389 machine_mode mode = GET_MODE (op);
13391 /* Only SImode or DImode registers can form the address. */
13392 if (mode != SImode && mode != DImode)
13397 else if (GET_CODE (op) == SUBREG)
13399 rtx reg = SUBREG_REG (op);
13404 mode = GET_MODE (reg);
13406 /* Don't allow SUBREGs that span more than a word. It can
13407 lead to spill failures when the register is one word out
13408 of a two word structure. */
13409 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13412 /* Allow only SUBREGs of non-eliminable hard registers. */
13413 if (register_no_elim_operand (reg, mode))
13417 /* Op is not a register. */
13421 /* Recognizes RTL expressions that are valid memory addresses for an
13422 instruction. The MODE argument is the machine mode for the MEM
13423 expression that wants to use this address.
13425 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13426 convert common non-canonical forms to canonical form so that they will
13430 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13432 struct ix86_address parts;
13433 rtx base, index, disp;
13434 HOST_WIDE_INT scale;
13435 enum ix86_address_seg seg;
13437 if (ix86_decompose_address (addr, &parts) <= 0)
13438 /* Decomposition failed. */
13442 index = parts.index;
13444 scale = parts.scale;
13447 /* Validate base register. */
13450 rtx reg = ix86_validate_address_register (base);
13452 if (reg == NULL_RTX)
13455 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13456 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13457 /* Base is not valid. */
13461 /* Validate index register. */
13464 rtx reg = ix86_validate_address_register (index);
13466 if (reg == NULL_RTX)
13469 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13470 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13471 /* Index is not valid. */
13475 /* Index and base should have the same mode. */
13477 && GET_MODE (base) != GET_MODE (index))
13480 /* Address override works only on the (%reg) part of %fs:(%reg). */
13481 if (seg != SEG_DEFAULT
13482 && ((base && GET_MODE (base) != word_mode)
13483 || (index && GET_MODE (index) != word_mode)))
13486 /* Validate scale factor. */
13490 /* Scale without index. */
13493 if (scale != 2 && scale != 4 && scale != 8)
13494 /* Scale is not a valid multiplier. */
13498 /* Validate displacement. */
13501 if (GET_CODE (disp) == CONST
13502 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13503 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13504 switch (XINT (XEXP (disp, 0), 1))
13506 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13507 used. While ABI specify also 32bit relocations, we don't produce
13508 them at all and use IP relative instead. */
13510 case UNSPEC_GOTOFF:
13511 gcc_assert (flag_pic);
13513 goto is_legitimate_pic;
13515 /* 64bit address unspec. */
13518 case UNSPEC_GOTPCREL:
13520 gcc_assert (flag_pic);
13521 goto is_legitimate_pic;
13523 case UNSPEC_GOTTPOFF:
13524 case UNSPEC_GOTNTPOFF:
13525 case UNSPEC_INDNTPOFF:
13526 case UNSPEC_NTPOFF:
13527 case UNSPEC_DTPOFF:
13530 case UNSPEC_STACK_CHECK:
13531 gcc_assert (flag_split_stack);
13535 /* Invalid address unspec. */
13539 else if (SYMBOLIC_CONST (disp)
13543 && MACHOPIC_INDIRECT
13544 && !machopic_operand_p (disp)
13550 if (TARGET_64BIT && (index || base))
13552 /* foo@dtpoff(%rX) is ok. */
13553 if (GET_CODE (disp) != CONST
13554 || GET_CODE (XEXP (disp, 0)) != PLUS
13555 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13556 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13557 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13558 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13559 /* Non-constant pic memory reference. */
13562 else if ((!TARGET_MACHO || flag_pic)
13563 && ! legitimate_pic_address_disp_p (disp))
13564 /* Displacement is an invalid pic construct. */
13567 else if (MACHO_DYNAMIC_NO_PIC_P
13568 && !ix86_legitimate_constant_p (Pmode, disp))
13569 /* displacment must be referenced via non_lazy_pointer */
13573 /* This code used to verify that a symbolic pic displacement
13574 includes the pic_offset_table_rtx register.
13576 While this is good idea, unfortunately these constructs may
13577 be created by "adds using lea" optimization for incorrect
13586 This code is nonsensical, but results in addressing
13587 GOT table with pic_offset_table_rtx base. We can't
13588 just refuse it easily, since it gets matched by
13589 "addsi3" pattern, that later gets split to lea in the
13590 case output register differs from input. While this
13591 can be handled by separate addsi pattern for this case
13592 that never results in lea, this seems to be easier and
13593 correct fix for crash to disable this test. */
13595 else if (GET_CODE (disp) != LABEL_REF
13596 && !CONST_INT_P (disp)
13597 && (GET_CODE (disp) != CONST
13598 || !ix86_legitimate_constant_p (Pmode, disp))
13599 && (GET_CODE (disp) != SYMBOL_REF
13600 || !ix86_legitimate_constant_p (Pmode, disp)))
13601 /* Displacement is not constant. */
13603 else if (TARGET_64BIT
13604 && !x86_64_immediate_operand (disp, VOIDmode))
13605 /* Displacement is out of range. */
13607 /* In x32 mode, constant addresses are sign extended to 64bit, so
13608 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13609 else if (TARGET_X32 && !(index || base)
13610 && CONST_INT_P (disp)
13611 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13615 /* Everything looks valid. */
13619 /* Determine if a given RTX is a valid constant address. */
13622 constant_address_p (rtx x)
13624 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13627 /* Return a unique alias set for the GOT. */
13629 static alias_set_type
13630 ix86_GOT_alias_set (void)
13632 static alias_set_type set = -1;
13634 set = new_alias_set ();
13638 /* Set regs_ever_live for PIC base address register
13639 to true if required. */
13641 set_pic_reg_ever_live ()
13643 if (reload_in_progress)
13644 df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
13647 /* Return a legitimate reference for ORIG (an address) using the
13648 register REG. If REG is 0, a new pseudo is generated.
13650 There are two types of references that must be handled:
13652 1. Global data references must load the address from the GOT, via
13653 the PIC reg. An insn is emitted to do this load, and the reg is
13656 2. Static data references, constant pool addresses, and code labels
13657 compute the address as an offset from the GOT, whose base is in
13658 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13659 differentiate them from global data objects. The returned
13660 address is the PIC reg + an unspec constant.
13662 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13663 reg also appears in the address. */
13666 legitimize_pic_address (rtx orig, rtx reg)
13669 rtx new_rtx = orig;
13672 if (TARGET_MACHO && !TARGET_64BIT)
13675 reg = gen_reg_rtx (Pmode);
13676 /* Use the generic Mach-O PIC machinery. */
13677 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13681 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13683 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13688 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13690 else if (TARGET_64BIT && !TARGET_PECOFF
13691 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13694 /* This symbol may be referenced via a displacement from the PIC
13695 base address (@GOTOFF). */
13697 set_pic_reg_ever_live ();
13698 if (GET_CODE (addr) == CONST)
13699 addr = XEXP (addr, 0);
13700 if (GET_CODE (addr) == PLUS)
13702 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13704 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13707 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13708 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13710 tmpreg = gen_reg_rtx (Pmode);
13713 emit_move_insn (tmpreg, new_rtx);
13717 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13718 tmpreg, 1, OPTAB_DIRECT);
13722 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13724 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13726 /* This symbol may be referenced via a displacement from the PIC
13727 base address (@GOTOFF). */
13729 set_pic_reg_ever_live ();
13730 if (GET_CODE (addr) == CONST)
13731 addr = XEXP (addr, 0);
13732 if (GET_CODE (addr) == PLUS)
13734 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13736 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13739 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13740 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13741 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13745 emit_move_insn (reg, new_rtx);
13749 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13750 /* We can't use @GOTOFF for text labels on VxWorks;
13751 see gotoff_operand. */
13752 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13754 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13758 /* For x64 PE-COFF there is no GOT table. So we use address
13760 if (TARGET_64BIT && TARGET_PECOFF)
13762 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13763 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13766 reg = gen_reg_rtx (Pmode);
13767 emit_move_insn (reg, new_rtx);
13770 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13772 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13773 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13774 new_rtx = gen_const_mem (Pmode, new_rtx);
13775 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13778 reg = gen_reg_rtx (Pmode);
13779 /* Use directly gen_movsi, otherwise the address is loaded
13780 into register for CSE. We don't want to CSE this addresses,
13781 instead we CSE addresses from the GOT table, so skip this. */
13782 emit_insn (gen_movsi (reg, new_rtx));
13787 /* This symbol must be referenced via a load from the
13788 Global Offset Table (@GOT). */
13790 set_pic_reg_ever_live ();
13791 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13792 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13794 new_rtx = force_reg (Pmode, new_rtx);
13795 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13796 new_rtx = gen_const_mem (Pmode, new_rtx);
13797 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13800 reg = gen_reg_rtx (Pmode);
13801 emit_move_insn (reg, new_rtx);
13807 if (CONST_INT_P (addr)
13808 && !x86_64_immediate_operand (addr, VOIDmode))
13812 emit_move_insn (reg, addr);
13816 new_rtx = force_reg (Pmode, addr);
13818 else if (GET_CODE (addr) == CONST)
13820 addr = XEXP (addr, 0);
13822 /* We must match stuff we generate before. Assume the only
13823 unspecs that can get here are ours. Not that we could do
13824 anything with them anyway.... */
13825 if (GET_CODE (addr) == UNSPEC
13826 || (GET_CODE (addr) == PLUS
13827 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13829 gcc_assert (GET_CODE (addr) == PLUS);
13831 if (GET_CODE (addr) == PLUS)
13833 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13835 /* Check first to see if this is a constant offset from a @GOTOFF
13836 symbol reference. */
13837 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13838 && CONST_INT_P (op1))
13842 set_pic_reg_ever_live ();
13843 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13845 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13846 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13847 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13851 emit_move_insn (reg, new_rtx);
13857 if (INTVAL (op1) < -16*1024*1024
13858 || INTVAL (op1) >= 16*1024*1024)
13860 if (!x86_64_immediate_operand (op1, Pmode))
13861 op1 = force_reg (Pmode, op1);
13862 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13868 rtx base = legitimize_pic_address (op0, reg);
13869 machine_mode mode = GET_MODE (base);
13871 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13873 if (CONST_INT_P (new_rtx))
13875 if (INTVAL (new_rtx) < -16*1024*1024
13876 || INTVAL (new_rtx) >= 16*1024*1024)
13878 if (!x86_64_immediate_operand (new_rtx, mode))
13879 new_rtx = force_reg (mode, new_rtx);
13881 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13884 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13888 if (GET_CODE (new_rtx) == PLUS
13889 && CONSTANT_P (XEXP (new_rtx, 1)))
13891 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13892 new_rtx = XEXP (new_rtx, 1);
13894 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13902 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13905 get_thread_pointer (machine_mode tp_mode, bool to_reg)
13907 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13909 if (GET_MODE (tp) != tp_mode)
13911 gcc_assert (GET_MODE (tp) == SImode);
13912 gcc_assert (tp_mode == DImode);
13914 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13918 tp = copy_to_mode_reg (tp_mode, tp);
13923 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13925 static GTY(()) rtx ix86_tls_symbol;
13928 ix86_tls_get_addr (void)
13930 if (!ix86_tls_symbol)
13933 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13934 ? "___tls_get_addr" : "__tls_get_addr");
13936 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13939 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13941 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13943 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13944 gen_rtx_CONST (Pmode, unspec));
13947 return ix86_tls_symbol;
13950 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13952 static GTY(()) rtx ix86_tls_module_base_symbol;
13955 ix86_tls_module_base (void)
13957 if (!ix86_tls_module_base_symbol)
13959 ix86_tls_module_base_symbol
13960 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13962 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13963 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13966 return ix86_tls_module_base_symbol;
13969 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13970 false if we expect this to be used for a memory address and true if
13971 we expect to load the address into a register. */
13974 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13976 rtx dest, base, off;
13977 rtx pic = NULL_RTX, tp = NULL_RTX;
13978 machine_mode tp_mode = Pmode;
13981 /* Fall back to global dynamic model if tool chain cannot support local
13983 if (TARGET_SUN_TLS && !TARGET_64BIT
13984 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13985 && model == TLS_MODEL_LOCAL_DYNAMIC)
13986 model = TLS_MODEL_GLOBAL_DYNAMIC;
13990 case TLS_MODEL_GLOBAL_DYNAMIC:
13991 dest = gen_reg_rtx (Pmode);
13995 if (flag_pic && !TARGET_PECOFF)
13996 pic = pic_offset_table_rtx;
13999 pic = gen_reg_rtx (Pmode);
14000 emit_insn (gen_set_got (pic));
14004 if (TARGET_GNU2_TLS)
14007 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
14009 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
14011 tp = get_thread_pointer (Pmode, true);
14012 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
14014 if (GET_MODE (x) != Pmode)
14015 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14017 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14021 rtx caddr = ix86_tls_get_addr ();
14025 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14030 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
14031 insns = get_insns ();
14034 if (GET_MODE (x) != Pmode)
14035 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14037 RTL_CONST_CALL_P (insns) = 1;
14038 emit_libcall_block (insns, dest, rax, x);
14041 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
14045 case TLS_MODEL_LOCAL_DYNAMIC:
14046 base = gen_reg_rtx (Pmode);
14051 pic = pic_offset_table_rtx;
14054 pic = gen_reg_rtx (Pmode);
14055 emit_insn (gen_set_got (pic));
14059 if (TARGET_GNU2_TLS)
14061 rtx tmp = ix86_tls_module_base ();
14064 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
14066 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
14068 tp = get_thread_pointer (Pmode, true);
14069 set_unique_reg_note (get_last_insn (), REG_EQUAL,
14070 gen_rtx_MINUS (Pmode, tmp, tp));
14074 rtx caddr = ix86_tls_get_addr ();
14078 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14084 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
14085 insns = get_insns ();
14088 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
14089 share the LD_BASE result with other LD model accesses. */
14090 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
14091 UNSPEC_TLS_LD_BASE);
14093 RTL_CONST_CALL_P (insns) = 1;
14094 emit_libcall_block (insns, base, rax, eqv);
14097 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
14100 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
14101 off = gen_rtx_CONST (Pmode, off);
14103 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
14105 if (TARGET_GNU2_TLS)
14107 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
14109 if (GET_MODE (x) != Pmode)
14110 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14112 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14116 case TLS_MODEL_INITIAL_EXEC:
14119 if (TARGET_SUN_TLS && !TARGET_X32)
14121 /* The Sun linker took the AMD64 TLS spec literally
14122 and can only handle %rax as destination of the
14123 initial executable code sequence. */
14125 dest = gen_reg_rtx (DImode);
14126 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14130 /* Generate DImode references to avoid %fs:(%reg32)
14131 problems and linker IE->LE relaxation bug. */
14134 type = UNSPEC_GOTNTPOFF;
14138 set_pic_reg_ever_live ();
14139 pic = pic_offset_table_rtx;
14140 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14142 else if (!TARGET_ANY_GNU_TLS)
14144 pic = gen_reg_rtx (Pmode);
14145 emit_insn (gen_set_got (pic));
14146 type = UNSPEC_GOTTPOFF;
14151 type = UNSPEC_INDNTPOFF;
14154 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14155 off = gen_rtx_CONST (tp_mode, off);
14157 off = gen_rtx_PLUS (tp_mode, pic, off);
14158 off = gen_const_mem (tp_mode, off);
14159 set_mem_alias_set (off, ix86_GOT_alias_set ());
14161 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14163 base = get_thread_pointer (tp_mode,
14164 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14165 off = force_reg (tp_mode, off);
14166 return gen_rtx_PLUS (tp_mode, base, off);
14170 base = get_thread_pointer (Pmode, true);
14171 dest = gen_reg_rtx (Pmode);
14172 emit_insn (ix86_gen_sub3 (dest, base, off));
14176 case TLS_MODEL_LOCAL_EXEC:
14177 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14178 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14179 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14180 off = gen_rtx_CONST (Pmode, off);
14182 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14184 base = get_thread_pointer (Pmode,
14185 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14186 return gen_rtx_PLUS (Pmode, base, off);
14190 base = get_thread_pointer (Pmode, true);
14191 dest = gen_reg_rtx (Pmode);
14192 emit_insn (ix86_gen_sub3 (dest, base, off));
14197 gcc_unreachable ();
14203 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14204 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14205 unique refptr-DECL symbol corresponding to symbol DECL. */
14207 struct dllimport_hasher : ggc_cache_hasher<tree_map *>
14209 static inline hashval_t hash (tree_map *m) { return m->hash; }
14211 equal (tree_map *a, tree_map *b)
14213 return a->base.from == b->base.from;
14217 handle_cache_entry (tree_map *&m)
14219 extern void gt_ggc_mx (tree_map *&);
14220 if (m == HTAB_EMPTY_ENTRY || m == HTAB_DELETED_ENTRY)
14222 else if (ggc_marked_p (m->base.from))
14225 m = static_cast<tree_map *> (HTAB_DELETED_ENTRY);
14229 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14232 get_dllimport_decl (tree decl, bool beimport)
14234 struct tree_map *h, in;
14236 const char *prefix;
14237 size_t namelen, prefixlen;
14242 if (!dllimport_map)
14243 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14245 in.hash = htab_hash_pointer (decl);
14246 in.base.from = decl;
14247 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14252 *loc = h = ggc_alloc<tree_map> ();
14254 h->base.from = decl;
14255 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14256 VAR_DECL, NULL, ptr_type_node);
14257 DECL_ARTIFICIAL (to) = 1;
14258 DECL_IGNORED_P (to) = 1;
14259 DECL_EXTERNAL (to) = 1;
14260 TREE_READONLY (to) = 1;
14262 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14263 name = targetm.strip_name_encoding (name);
14265 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14266 ? "*__imp_" : "*__imp__";
14268 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14269 namelen = strlen (name);
14270 prefixlen = strlen (prefix);
14271 imp_name = (char *) alloca (namelen + prefixlen + 1);
14272 memcpy (imp_name, prefix, prefixlen);
14273 memcpy (imp_name + prefixlen, name, namelen + 1);
14275 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14276 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14277 SET_SYMBOL_REF_DECL (rtl, to);
14278 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14281 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14282 #ifdef SUB_TARGET_RECORD_STUB
14283 SUB_TARGET_RECORD_STUB (name);
14287 rtl = gen_const_mem (Pmode, rtl);
14288 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14290 SET_DECL_RTL (to, rtl);
14291 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14296 /* Expand SYMBOL into its corresponding far-addresse symbol.
14297 WANT_REG is true if we require the result be a register. */
14300 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14305 gcc_assert (SYMBOL_REF_DECL (symbol));
14306 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14308 x = DECL_RTL (imp_decl);
14310 x = force_reg (Pmode, x);
14314 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14315 true if we require the result be a register. */
14318 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14323 gcc_assert (SYMBOL_REF_DECL (symbol));
14324 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14326 x = DECL_RTL (imp_decl);
14328 x = force_reg (Pmode, x);
14332 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14333 is true if we require the result be a register. */
14336 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14338 if (!TARGET_PECOFF)
14341 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14343 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14344 return legitimize_dllimport_symbol (addr, inreg);
14345 if (GET_CODE (addr) == CONST
14346 && GET_CODE (XEXP (addr, 0)) == PLUS
14347 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14348 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14350 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14351 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14355 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14357 if (GET_CODE (addr) == SYMBOL_REF
14358 && !is_imported_p (addr)
14359 && SYMBOL_REF_EXTERNAL_P (addr)
14360 && SYMBOL_REF_DECL (addr))
14361 return legitimize_pe_coff_extern_decl (addr, inreg);
14363 if (GET_CODE (addr) == CONST
14364 && GET_CODE (XEXP (addr, 0)) == PLUS
14365 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14366 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14367 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14368 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14370 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14371 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14376 /* Try machine-dependent ways of modifying an illegitimate address
14377 to be legitimate. If we find one, return the new, valid address.
14378 This macro is used in only one place: `memory_address' in explow.c.
14380 OLDX is the address as it was before break_out_memory_refs was called.
14381 In some cases it is useful to look at this to decide what needs to be done.
14383 It is always safe for this macro to do nothing. It exists to recognize
14384 opportunities to optimize the output.
14386 For the 80386, we handle X+REG by loading X into a register R and
14387 using R+REG. R will go in a general reg and indexing will be used.
14388 However, if REG is a broken-out memory address or multiplication,
14389 nothing needs to be done because REG can certainly go in a general reg.
14391 When -fpic is used, special handling is needed for symbolic references.
14392 See comments by legitimize_pic_address in i386.c for details. */
14395 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14397 bool changed = false;
14400 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14402 return legitimize_tls_address (x, (enum tls_model) log, false);
14403 if (GET_CODE (x) == CONST
14404 && GET_CODE (XEXP (x, 0)) == PLUS
14405 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14406 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14408 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14409 (enum tls_model) log, false);
14410 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14413 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14415 rtx tmp = legitimize_pe_coff_symbol (x, true);
14420 if (flag_pic && SYMBOLIC_CONST (x))
14421 return legitimize_pic_address (x, 0);
14424 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14425 return machopic_indirect_data_reference (x, 0);
14428 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14429 if (GET_CODE (x) == ASHIFT
14430 && CONST_INT_P (XEXP (x, 1))
14431 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14434 log = INTVAL (XEXP (x, 1));
14435 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14436 GEN_INT (1 << log));
14439 if (GET_CODE (x) == PLUS)
14441 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14443 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14444 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14445 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14448 log = INTVAL (XEXP (XEXP (x, 0), 1));
14449 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14450 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14451 GEN_INT (1 << log));
14454 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14455 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14456 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14459 log = INTVAL (XEXP (XEXP (x, 1), 1));
14460 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14461 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14462 GEN_INT (1 << log));
14465 /* Put multiply first if it isn't already. */
14466 if (GET_CODE (XEXP (x, 1)) == MULT)
14468 std::swap (XEXP (x, 0), XEXP (x, 1));
14472 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14473 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14474 created by virtual register instantiation, register elimination, and
14475 similar optimizations. */
14476 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14479 x = gen_rtx_PLUS (Pmode,
14480 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14481 XEXP (XEXP (x, 1), 0)),
14482 XEXP (XEXP (x, 1), 1));
14486 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14487 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14488 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14489 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14490 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14491 && CONSTANT_P (XEXP (x, 1)))
14494 rtx other = NULL_RTX;
14496 if (CONST_INT_P (XEXP (x, 1)))
14498 constant = XEXP (x, 1);
14499 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14501 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14503 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14504 other = XEXP (x, 1);
14512 x = gen_rtx_PLUS (Pmode,
14513 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14514 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14515 plus_constant (Pmode, other,
14516 INTVAL (constant)));
14520 if (changed && ix86_legitimate_address_p (mode, x, false))
14523 if (GET_CODE (XEXP (x, 0)) == MULT)
14526 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14529 if (GET_CODE (XEXP (x, 1)) == MULT)
14532 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14536 && REG_P (XEXP (x, 1))
14537 && REG_P (XEXP (x, 0)))
14540 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14543 x = legitimize_pic_address (x, 0);
14546 if (changed && ix86_legitimate_address_p (mode, x, false))
14549 if (REG_P (XEXP (x, 0)))
14551 rtx temp = gen_reg_rtx (Pmode);
14552 rtx val = force_operand (XEXP (x, 1), temp);
14555 val = convert_to_mode (Pmode, val, 1);
14556 emit_move_insn (temp, val);
14559 XEXP (x, 1) = temp;
14563 else if (REG_P (XEXP (x, 1)))
14565 rtx temp = gen_reg_rtx (Pmode);
14566 rtx val = force_operand (XEXP (x, 0), temp);
14569 val = convert_to_mode (Pmode, val, 1);
14570 emit_move_insn (temp, val);
14573 XEXP (x, 0) = temp;
14581 /* Print an integer constant expression in assembler syntax. Addition
14582 and subtraction are the only arithmetic that may appear in these
14583 expressions. FILE is the stdio stream to write to, X is the rtx, and
14584 CODE is the operand print code from the output string. */
14587 output_pic_addr_const (FILE *file, rtx x, int code)
14591 switch (GET_CODE (x))
14594 gcc_assert (flag_pic);
14599 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14600 output_addr_const (file, x);
14603 const char *name = XSTR (x, 0);
14605 /* Mark the decl as referenced so that cgraph will
14606 output the function. */
14607 if (SYMBOL_REF_DECL (x))
14608 mark_decl_referenced (SYMBOL_REF_DECL (x));
14611 if (MACHOPIC_INDIRECT
14612 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14613 name = machopic_indirection_name (x, /*stub_p=*/true);
14615 assemble_name (file, name);
14617 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14618 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14619 fputs ("@PLT", file);
14626 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14627 assemble_name (asm_out_file, buf);
14631 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14635 /* This used to output parentheses around the expression,
14636 but that does not work on the 386 (either ATT or BSD assembler). */
14637 output_pic_addr_const (file, XEXP (x, 0), code);
14641 if (GET_MODE (x) == VOIDmode)
14643 /* We can use %d if the number is <32 bits and positive. */
14644 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14645 fprintf (file, "0x%lx%08lx",
14646 (unsigned long) CONST_DOUBLE_HIGH (x),
14647 (unsigned long) CONST_DOUBLE_LOW (x));
14649 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14652 /* We can't handle floating point constants;
14653 TARGET_PRINT_OPERAND must handle them. */
14654 output_operand_lossage ("floating constant misused");
14658 /* Some assemblers need integer constants to appear first. */
14659 if (CONST_INT_P (XEXP (x, 0)))
14661 output_pic_addr_const (file, XEXP (x, 0), code);
14663 output_pic_addr_const (file, XEXP (x, 1), code);
14667 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14668 output_pic_addr_const (file, XEXP (x, 1), code);
14670 output_pic_addr_const (file, XEXP (x, 0), code);
14676 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14677 output_pic_addr_const (file, XEXP (x, 0), code);
14679 output_pic_addr_const (file, XEXP (x, 1), code);
14681 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14685 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14687 bool f = i386_asm_output_addr_const_extra (file, x);
14692 gcc_assert (XVECLEN (x, 0) == 1);
14693 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14694 switch (XINT (x, 1))
14697 fputs ("@GOT", file);
14699 case UNSPEC_GOTOFF:
14700 fputs ("@GOTOFF", file);
14702 case UNSPEC_PLTOFF:
14703 fputs ("@PLTOFF", file);
14706 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14707 "(%rip)" : "[rip]", file);
14709 case UNSPEC_GOTPCREL:
14710 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14711 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14713 case UNSPEC_GOTTPOFF:
14714 /* FIXME: This might be @TPOFF in Sun ld too. */
14715 fputs ("@gottpoff", file);
14718 fputs ("@tpoff", file);
14720 case UNSPEC_NTPOFF:
14722 fputs ("@tpoff", file);
14724 fputs ("@ntpoff", file);
14726 case UNSPEC_DTPOFF:
14727 fputs ("@dtpoff", file);
14729 case UNSPEC_GOTNTPOFF:
14731 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14732 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14734 fputs ("@gotntpoff", file);
14736 case UNSPEC_INDNTPOFF:
14737 fputs ("@indntpoff", file);
14740 case UNSPEC_MACHOPIC_OFFSET:
14742 machopic_output_function_base_name (file);
14746 output_operand_lossage ("invalid UNSPEC as operand");
14752 output_operand_lossage ("invalid expression as operand");
14756 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14757 We need to emit DTP-relative relocations. */
14759 static void ATTRIBUTE_UNUSED
14760 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14762 fputs (ASM_LONG, file);
14763 output_addr_const (file, x);
14764 fputs ("@dtpoff", file);
14770 fputs (", 0", file);
14773 gcc_unreachable ();
14777 /* Return true if X is a representation of the PIC register. This copes
14778 with calls from ix86_find_base_term, where the register might have
14779 been replaced by a cselib value. */
14782 ix86_pic_register_p (rtx x)
14784 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14785 return (pic_offset_table_rtx
14786 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14787 else if (!REG_P (x))
14789 else if (pic_offset_table_rtx)
14791 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14793 if (HARD_REGISTER_P (x)
14794 && !HARD_REGISTER_P (pic_offset_table_rtx)
14795 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14800 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14803 /* Helper function for ix86_delegitimize_address.
14804 Attempt to delegitimize TLS local-exec accesses. */
14807 ix86_delegitimize_tls_address (rtx orig_x)
14809 rtx x = orig_x, unspec;
14810 struct ix86_address addr;
14812 if (!TARGET_TLS_DIRECT_SEG_REFS)
14816 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14818 if (ix86_decompose_address (x, &addr) == 0
14819 || addr.seg != DEFAULT_TLS_SEG_REG
14820 || addr.disp == NULL_RTX
14821 || GET_CODE (addr.disp) != CONST)
14823 unspec = XEXP (addr.disp, 0);
14824 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14825 unspec = XEXP (unspec, 0);
14826 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14828 x = XVECEXP (unspec, 0, 0);
14829 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14830 if (unspec != XEXP (addr.disp, 0))
14831 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14834 rtx idx = addr.index;
14835 if (addr.scale != 1)
14836 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14837 x = gen_rtx_PLUS (Pmode, idx, x);
14840 x = gen_rtx_PLUS (Pmode, addr.base, x);
14841 if (MEM_P (orig_x))
14842 x = replace_equiv_address_nv (orig_x, x);
14846 /* In the name of slightly smaller debug output, and to cater to
14847 general assembler lossage, recognize PIC+GOTOFF and turn it back
14848 into a direct symbol reference.
14850 On Darwin, this is necessary to avoid a crash, because Darwin
14851 has a different PIC label for each routine but the DWARF debugging
14852 information is not associated with any particular routine, so it's
14853 necessary to remove references to the PIC label from RTL stored by
14854 the DWARF output code. */
14857 ix86_delegitimize_address (rtx x)
14859 rtx orig_x = delegitimize_mem_from_attrs (x);
14860 /* addend is NULL or some rtx if x is something+GOTOFF where
14861 something doesn't include the PIC register. */
14862 rtx addend = NULL_RTX;
14863 /* reg_addend is NULL or a multiple of some register. */
14864 rtx reg_addend = NULL_RTX;
14865 /* const_addend is NULL or a const_int. */
14866 rtx const_addend = NULL_RTX;
14867 /* This is the result, or NULL. */
14868 rtx result = NULL_RTX;
14877 if (GET_CODE (x) == CONST
14878 && GET_CODE (XEXP (x, 0)) == PLUS
14879 && GET_MODE (XEXP (x, 0)) == Pmode
14880 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14881 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14882 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14884 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14885 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14886 if (MEM_P (orig_x))
14887 x = replace_equiv_address_nv (orig_x, x);
14891 if (GET_CODE (x) == CONST
14892 && GET_CODE (XEXP (x, 0)) == UNSPEC
14893 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14894 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14895 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14897 x = XVECEXP (XEXP (x, 0), 0, 0);
14898 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14900 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14908 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14909 return ix86_delegitimize_tls_address (orig_x);
14911 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14912 and -mcmodel=medium -fpic. */
14915 if (GET_CODE (x) != PLUS
14916 || GET_CODE (XEXP (x, 1)) != CONST)
14917 return ix86_delegitimize_tls_address (orig_x);
14919 if (ix86_pic_register_p (XEXP (x, 0)))
14920 /* %ebx + GOT/GOTOFF */
14922 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14924 /* %ebx + %reg * scale + GOT/GOTOFF */
14925 reg_addend = XEXP (x, 0);
14926 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14927 reg_addend = XEXP (reg_addend, 1);
14928 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14929 reg_addend = XEXP (reg_addend, 0);
14932 reg_addend = NULL_RTX;
14933 addend = XEXP (x, 0);
14937 addend = XEXP (x, 0);
14939 x = XEXP (XEXP (x, 1), 0);
14940 if (GET_CODE (x) == PLUS
14941 && CONST_INT_P (XEXP (x, 1)))
14943 const_addend = XEXP (x, 1);
14947 if (GET_CODE (x) == UNSPEC
14948 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14949 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14950 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14951 && !MEM_P (orig_x) && !addend)))
14952 result = XVECEXP (x, 0, 0);
14954 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14955 && !MEM_P (orig_x))
14956 result = XVECEXP (x, 0, 0);
14959 return ix86_delegitimize_tls_address (orig_x);
14962 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14964 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14967 /* If the rest of original X doesn't involve the PIC register, add
14968 addend and subtract pic_offset_table_rtx. This can happen e.g.
14970 leal (%ebx, %ecx, 4), %ecx
14972 movl foo@GOTOFF(%ecx), %edx
14973 in which case we return (%ecx - %ebx) + foo
14974 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14975 and reload has completed. */
14976 if (pic_offset_table_rtx
14977 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14978 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14979 pic_offset_table_rtx),
14981 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14983 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14984 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14985 result = gen_rtx_PLUS (Pmode, tmp, result);
14990 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14992 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14993 if (result == NULL_RTX)
14999 /* If X is a machine specific address (i.e. a symbol or label being
15000 referenced as a displacement from the GOT implemented using an
15001 UNSPEC), then return the base term. Otherwise return X. */
15004 ix86_find_base_term (rtx x)
15010 if (GET_CODE (x) != CONST)
15012 term = XEXP (x, 0);
15013 if (GET_CODE (term) == PLUS
15014 && (CONST_INT_P (XEXP (term, 1))
15015 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
15016 term = XEXP (term, 0);
15017 if (GET_CODE (term) != UNSPEC
15018 || (XINT (term, 1) != UNSPEC_GOTPCREL
15019 && XINT (term, 1) != UNSPEC_PCREL))
15022 return XVECEXP (term, 0, 0);
15025 return ix86_delegitimize_address (x);
15029 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
15030 bool fp, FILE *file)
15032 const char *suffix;
15034 if (mode == CCFPmode || mode == CCFPUmode)
15036 code = ix86_fp_compare_code_to_integer (code);
15040 code = reverse_condition (code);
15091 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
15095 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
15096 Those same assemblers have the same but opposite lossage on cmov. */
15097 if (mode == CCmode)
15098 suffix = fp ? "nbe" : "a";
15100 gcc_unreachable ();
15116 gcc_unreachable ();
15120 if (mode == CCmode)
15122 else if (mode == CCCmode)
15123 suffix = fp ? "b" : "c";
15125 gcc_unreachable ();
15141 gcc_unreachable ();
15145 if (mode == CCmode)
15147 else if (mode == CCCmode)
15148 suffix = fp ? "nb" : "nc";
15150 gcc_unreachable ();
15153 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15157 if (mode == CCmode)
15160 gcc_unreachable ();
15163 suffix = fp ? "u" : "p";
15166 suffix = fp ? "nu" : "np";
15169 gcc_unreachable ();
15171 fputs (suffix, file);
15174 /* Print the name of register X to FILE based on its machine mode and number.
15175 If CODE is 'w', pretend the mode is HImode.
15176 If CODE is 'b', pretend the mode is QImode.
15177 If CODE is 'k', pretend the mode is SImode.
15178 If CODE is 'q', pretend the mode is DImode.
15179 If CODE is 'x', pretend the mode is V4SFmode.
15180 If CODE is 't', pretend the mode is V8SFmode.
15181 If CODE is 'g', pretend the mode is V16SFmode.
15182 If CODE is 'h', pretend the reg is the 'high' byte register.
15183 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15184 If CODE is 'd', duplicate the operand for AVX instruction.
15188 print_reg (rtx x, int code, FILE *file)
15191 unsigned int regno;
15192 bool duplicated = code == 'd' && TARGET_AVX;
15194 if (ASSEMBLER_DIALECT == ASM_ATT)
15199 gcc_assert (TARGET_64BIT);
15200 fputs ("rip", file);
15204 regno = true_regnum (x);
15205 gcc_assert (regno != ARG_POINTER_REGNUM
15206 && regno != FRAME_POINTER_REGNUM
15207 && regno != FLAGS_REG
15208 && regno != FPSR_REG
15209 && regno != FPCR_REG);
15211 if (code == 'w' || MMX_REG_P (x))
15213 else if (code == 'b')
15215 else if (code == 'k')
15217 else if (code == 'q')
15219 else if (code == 'y')
15221 else if (code == 'h')
15223 else if (code == 'x')
15225 else if (code == 't')
15227 else if (code == 'g')
15230 code = GET_MODE_SIZE (GET_MODE (x));
15232 /* Irritatingly, AMD extended registers use different naming convention
15233 from the normal registers: "r%d[bwd]" */
15234 if (REX_INT_REGNO_P (regno))
15236 gcc_assert (TARGET_64BIT);
15238 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
15242 error ("extended registers have no high halves");
15257 error ("unsupported operand size for extended register");
15267 if (STACK_TOP_P (x))
15276 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x) && ! ANY_BND_REG_P (x))
15277 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
15282 reg = hi_reg_name[regno];
15285 if (regno >= ARRAY_SIZE (qi_reg_name))
15287 reg = qi_reg_name[regno];
15290 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15292 reg = qi_high_reg_name[regno];
15297 gcc_assert (!duplicated);
15299 fputs (hi_reg_name[regno] + 1, file);
15305 gcc_assert (!duplicated);
15307 fputs (hi_reg_name[REGNO (x)] + 1, file);
15312 gcc_unreachable ();
15318 if (ASSEMBLER_DIALECT == ASM_ATT)
15319 fprintf (file, ", %%%s", reg);
15321 fprintf (file, ", %s", reg);
15325 /* Meaning of CODE:
15326 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15327 C -- print opcode suffix for set/cmov insn.
15328 c -- like C, but print reversed condition
15329 F,f -- likewise, but for floating-point.
15330 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15332 R -- print embeded rounding and sae.
15333 r -- print only sae.
15334 z -- print the opcode suffix for the size of the current operand.
15335 Z -- likewise, with special suffixes for x87 instructions.
15336 * -- print a star (in certain assembler syntax)
15337 A -- print an absolute memory reference.
15338 E -- print address with DImode register names if TARGET_64BIT.
15339 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15340 s -- print a shift double count, followed by the assemblers argument
15342 b -- print the QImode name of the register for the indicated operand.
15343 %b0 would print %al if operands[0] is reg 0.
15344 w -- likewise, print the HImode name of the register.
15345 k -- likewise, print the SImode name of the register.
15346 q -- likewise, print the DImode name of the register.
15347 x -- likewise, print the V4SFmode name of the register.
15348 t -- likewise, print the V8SFmode name of the register.
15349 g -- likewise, print the V16SFmode name of the register.
15350 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15351 y -- print "st(0)" instead of "st" as a register.
15352 d -- print duplicated register operand for AVX instruction.
15353 D -- print condition for SSE cmp instruction.
15354 P -- if PIC, print an @PLT suffix.
15355 p -- print raw symbol name.
15356 X -- don't print any sort of PIC '@' suffix for a symbol.
15357 & -- print some in-use local-dynamic symbol name.
15358 H -- print a memory address offset by 8; used for sse high-parts
15359 Y -- print condition for XOP pcom* instruction.
15360 + -- print a branch hint as 'cs' or 'ds' prefix
15361 ; -- print a semicolon (after prefixes due to bug in older gas).
15362 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15363 @ -- print a segment register of thread base pointer load
15364 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15365 ! -- print MPX prefix for jxx/call/ret instructions if required.
15369 ix86_print_operand (FILE *file, rtx x, int code)
15376 switch (ASSEMBLER_DIALECT)
15383 /* Intel syntax. For absolute addresses, registers should not
15384 be surrounded by braces. */
15388 ix86_print_operand (file, x, 0);
15395 gcc_unreachable ();
15398 ix86_print_operand (file, x, 0);
15402 /* Wrap address in an UNSPEC to declare special handling. */
15404 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15406 output_address (x);
15410 if (ASSEMBLER_DIALECT == ASM_ATT)
15415 if (ASSEMBLER_DIALECT == ASM_ATT)
15420 if (ASSEMBLER_DIALECT == ASM_ATT)
15425 if (ASSEMBLER_DIALECT == ASM_ATT)
15430 if (ASSEMBLER_DIALECT == ASM_ATT)
15435 if (ASSEMBLER_DIALECT == ASM_ATT)
15440 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15441 if (ASSEMBLER_DIALECT != ASM_ATT)
15444 switch (GET_MODE_SIZE (GET_MODE (x)))
15459 output_operand_lossage
15460 ("invalid operand size for operand code 'O'");
15469 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15471 /* Opcodes don't get size suffixes if using Intel opcodes. */
15472 if (ASSEMBLER_DIALECT == ASM_INTEL)
15475 switch (GET_MODE_SIZE (GET_MODE (x)))
15494 output_operand_lossage
15495 ("invalid operand size for operand code 'z'");
15500 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15502 (0, "non-integer operand used with operand code 'z'");
15506 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15507 if (ASSEMBLER_DIALECT == ASM_INTEL)
15510 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15512 switch (GET_MODE_SIZE (GET_MODE (x)))
15515 #ifdef HAVE_AS_IX86_FILDS
15525 #ifdef HAVE_AS_IX86_FILDQ
15528 fputs ("ll", file);
15536 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15538 /* 387 opcodes don't get size suffixes
15539 if the operands are registers. */
15540 if (STACK_REG_P (x))
15543 switch (GET_MODE_SIZE (GET_MODE (x)))
15564 output_operand_lossage
15565 ("invalid operand type used with operand code 'Z'");
15569 output_operand_lossage
15570 ("invalid operand size for operand code 'Z'");
15589 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15591 ix86_print_operand (file, x, 0);
15592 fputs (", ", file);
15597 switch (GET_CODE (x))
15600 fputs ("neq", file);
15603 fputs ("eq", file);
15607 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15611 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15615 fputs ("le", file);
15619 fputs ("lt", file);
15622 fputs ("unord", file);
15625 fputs ("ord", file);
15628 fputs ("ueq", file);
15631 fputs ("nlt", file);
15634 fputs ("nle", file);
15637 fputs ("ule", file);
15640 fputs ("ult", file);
15643 fputs ("une", file);
15646 output_operand_lossage ("operand is not a condition code, "
15647 "invalid operand code 'Y'");
15653 /* Little bit of braindamage here. The SSE compare instructions
15654 does use completely different names for the comparisons that the
15655 fp conditional moves. */
15656 switch (GET_CODE (x))
15661 fputs ("eq_us", file);
15665 fputs ("eq", file);
15670 fputs ("nge", file);
15674 fputs ("lt", file);
15679 fputs ("ngt", file);
15683 fputs ("le", file);
15686 fputs ("unord", file);
15691 fputs ("neq_oq", file);
15695 fputs ("neq", file);
15700 fputs ("ge", file);
15704 fputs ("nlt", file);
15709 fputs ("gt", file);
15713 fputs ("nle", file);
15716 fputs ("ord", file);
15719 output_operand_lossage ("operand is not a condition code, "
15720 "invalid operand code 'D'");
15727 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15728 if (ASSEMBLER_DIALECT == ASM_ATT)
15734 if (!COMPARISON_P (x))
15736 output_operand_lossage ("operand is not a condition code, "
15737 "invalid operand code '%c'", code);
15740 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15741 code == 'c' || code == 'f',
15742 code == 'F' || code == 'f',
15747 if (!offsettable_memref_p (x))
15749 output_operand_lossage ("operand is not an offsettable memory "
15750 "reference, invalid operand code 'H'");
15753 /* It doesn't actually matter what mode we use here, as we're
15754 only going to use this for printing. */
15755 x = adjust_address_nv (x, DImode, 8);
15756 /* Output 'qword ptr' for intel assembler dialect. */
15757 if (ASSEMBLER_DIALECT == ASM_INTEL)
15762 gcc_assert (CONST_INT_P (x));
15764 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15765 #ifdef HAVE_AS_IX86_HLE
15766 fputs ("xacquire ", file);
15768 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15770 else if (INTVAL (x) & IX86_HLE_RELEASE)
15771 #ifdef HAVE_AS_IX86_HLE
15772 fputs ("xrelease ", file);
15774 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15776 /* We do not want to print value of the operand. */
15780 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15781 fputs ("{z}", file);
15785 gcc_assert (CONST_INT_P (x));
15786 gcc_assert (INTVAL (x) == ROUND_SAE);
15788 if (ASSEMBLER_DIALECT == ASM_INTEL)
15789 fputs (", ", file);
15791 fputs ("{sae}", file);
15793 if (ASSEMBLER_DIALECT == ASM_ATT)
15794 fputs (", ", file);
15799 gcc_assert (CONST_INT_P (x));
15801 if (ASSEMBLER_DIALECT == ASM_INTEL)
15802 fputs (", ", file);
15804 switch (INTVAL (x))
15806 case ROUND_NEAREST_INT | ROUND_SAE:
15807 fputs ("{rn-sae}", file);
15809 case ROUND_NEG_INF | ROUND_SAE:
15810 fputs ("{rd-sae}", file);
15812 case ROUND_POS_INF | ROUND_SAE:
15813 fputs ("{ru-sae}", file);
15815 case ROUND_ZERO | ROUND_SAE:
15816 fputs ("{rz-sae}", file);
15819 gcc_unreachable ();
15822 if (ASSEMBLER_DIALECT == ASM_ATT)
15823 fputs (", ", file);
15828 if (ASSEMBLER_DIALECT == ASM_ATT)
15834 const char *name = get_some_local_dynamic_name ();
15836 output_operand_lossage ("'%%&' used without any "
15837 "local dynamic TLS references");
15839 assemble_name (file, name);
15848 || optimize_function_for_size_p (cfun)
15849 || !TARGET_BRANCH_PREDICTION_HINTS)
15852 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15855 int pred_val = XINT (x, 0);
15857 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15858 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15860 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15862 = final_forward_branch_p (current_output_insn) == 0;
15864 /* Emit hints only in the case default branch prediction
15865 heuristics would fail. */
15866 if (taken != cputaken)
15868 /* We use 3e (DS) prefix for taken branches and
15869 2e (CS) prefix for not taken branches. */
15871 fputs ("ds ; ", file);
15873 fputs ("cs ; ", file);
15881 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15887 if (ASSEMBLER_DIALECT == ASM_ATT)
15890 /* The kernel uses a different segment register for performance
15891 reasons; a system call would not have to trash the userspace
15892 segment register, which would be expensive. */
15893 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15894 fputs ("fs", file);
15896 fputs ("gs", file);
15900 putc (TARGET_AVX2 ? 'i' : 'f', file);
15904 if (TARGET_64BIT && Pmode != word_mode)
15905 fputs ("addr32 ", file);
15909 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15910 fputs ("bnd ", file);
15914 output_operand_lossage ("invalid operand code '%c'", code);
15919 print_reg (x, code, file);
15921 else if (MEM_P (x))
15923 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15924 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15925 && GET_MODE (x) != BLKmode)
15928 switch (GET_MODE_SIZE (GET_MODE (x)))
15930 case 1: size = "BYTE"; break;
15931 case 2: size = "WORD"; break;
15932 case 4: size = "DWORD"; break;
15933 case 8: size = "QWORD"; break;
15934 case 12: size = "TBYTE"; break;
15936 if (GET_MODE (x) == XFmode)
15941 case 32: size = "YMMWORD"; break;
15942 case 64: size = "ZMMWORD"; break;
15944 gcc_unreachable ();
15947 /* Check for explicit size override (codes 'b', 'w', 'k',
15951 else if (code == 'w')
15953 else if (code == 'k')
15955 else if (code == 'q')
15957 else if (code == 'x')
15960 fputs (size, file);
15961 fputs (" PTR ", file);
15965 /* Avoid (%rip) for call operands. */
15966 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15967 && !CONST_INT_P (x))
15968 output_addr_const (file, x);
15969 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15970 output_operand_lossage ("invalid constraints for operand");
15972 output_address (x);
15975 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15980 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15981 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15983 if (ASSEMBLER_DIALECT == ASM_ATT)
15985 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15987 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15988 (unsigned long long) (int) l);
15990 fprintf (file, "0x%08x", (unsigned int) l);
15993 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15998 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15999 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16001 if (ASSEMBLER_DIALECT == ASM_ATT)
16003 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
16006 /* These float cases don't actually occur as immediate operands. */
16007 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
16011 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
16012 fputs (dstr, file);
16017 /* We have patterns that allow zero sets of memory, for instance.
16018 In 64-bit mode, we should probably support all 8-byte vectors,
16019 since we can in fact encode that into an immediate. */
16020 if (GET_CODE (x) == CONST_VECTOR)
16022 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
16026 if (code != 'P' && code != 'p')
16028 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
16030 if (ASSEMBLER_DIALECT == ASM_ATT)
16033 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
16034 || GET_CODE (x) == LABEL_REF)
16036 if (ASSEMBLER_DIALECT == ASM_ATT)
16039 fputs ("OFFSET FLAT:", file);
16042 if (CONST_INT_P (x))
16043 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16044 else if (flag_pic || MACHOPIC_INDIRECT)
16045 output_pic_addr_const (file, x, code);
16047 output_addr_const (file, x);
16052 ix86_print_operand_punct_valid_p (unsigned char code)
16054 return (code == '@' || code == '*' || code == '+' || code == '&'
16055 || code == ';' || code == '~' || code == '^' || code == '!');
16058 /* Print a memory operand whose address is ADDR. */
16061 ix86_print_operand_address (FILE *file, rtx addr)
16063 struct ix86_address parts;
16064 rtx base, index, disp;
16070 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
16072 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16073 gcc_assert (parts.index == NULL_RTX);
16074 parts.index = XVECEXP (addr, 0, 1);
16075 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
16076 addr = XVECEXP (addr, 0, 0);
16079 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
16081 gcc_assert (TARGET_64BIT);
16082 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16085 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
16087 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
16088 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
16089 if (parts.base != NULL_RTX)
16091 parts.index = parts.base;
16094 parts.base = XVECEXP (addr, 0, 0);
16095 addr = XVECEXP (addr, 0, 0);
16097 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
16099 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16100 gcc_assert (parts.index == NULL_RTX);
16101 parts.index = XVECEXP (addr, 0, 1);
16102 addr = XVECEXP (addr, 0, 0);
16105 ok = ix86_decompose_address (addr, &parts);
16110 index = parts.index;
16112 scale = parts.scale;
16120 if (ASSEMBLER_DIALECT == ASM_ATT)
16122 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16125 gcc_unreachable ();
16128 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16129 if (TARGET_64BIT && !base && !index)
16133 if (GET_CODE (disp) == CONST
16134 && GET_CODE (XEXP (disp, 0)) == PLUS
16135 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16136 symbol = XEXP (XEXP (disp, 0), 0);
16138 if (GET_CODE (symbol) == LABEL_REF
16139 || (GET_CODE (symbol) == SYMBOL_REF
16140 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16143 if (!base && !index)
16145 /* Displacement only requires special attention. */
16147 if (CONST_INT_P (disp))
16149 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16150 fputs ("ds:", file);
16151 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16154 output_pic_addr_const (file, disp, 0);
16156 output_addr_const (file, disp);
16160 /* Print SImode register names to force addr32 prefix. */
16161 if (SImode_address_operand (addr, VOIDmode))
16163 #ifdef ENABLE_CHECKING
16164 gcc_assert (TARGET_64BIT);
16165 switch (GET_CODE (addr))
16168 gcc_assert (GET_MODE (addr) == SImode);
16169 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16173 gcc_assert (GET_MODE (addr) == DImode);
16176 gcc_unreachable ();
16179 gcc_assert (!code);
16185 && CONST_INT_P (disp)
16186 && INTVAL (disp) < -16*1024*1024)
16188 /* X32 runs in 64-bit mode, where displacement, DISP, in
16189 address DISP(%r64), is encoded as 32-bit immediate sign-
16190 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16191 address is %r64 + 0xffffffffbffffd00. When %r64 <
16192 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16193 which is invalid for x32. The correct address is %r64
16194 - 0x40000300 == 0xf7ffdd64. To properly encode
16195 -0x40000300(%r64) for x32, we zero-extend negative
16196 displacement by forcing addr32 prefix which truncates
16197 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16198 zero-extend all negative displacements, including -1(%rsp).
16199 However, for small negative displacements, sign-extension
16200 won't cause overflow. We only zero-extend negative
16201 displacements if they < -16*1024*1024, which is also used
16202 to check legitimate address displacements for PIC. */
16206 if (ASSEMBLER_DIALECT == ASM_ATT)
16211 output_pic_addr_const (file, disp, 0);
16212 else if (GET_CODE (disp) == LABEL_REF)
16213 output_asm_label (disp);
16215 output_addr_const (file, disp);
16220 print_reg (base, code, file);
16224 print_reg (index, vsib ? 0 : code, file);
16225 if (scale != 1 || vsib)
16226 fprintf (file, ",%d", scale);
16232 rtx offset = NULL_RTX;
16236 /* Pull out the offset of a symbol; print any symbol itself. */
16237 if (GET_CODE (disp) == CONST
16238 && GET_CODE (XEXP (disp, 0)) == PLUS
16239 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16241 offset = XEXP (XEXP (disp, 0), 1);
16242 disp = gen_rtx_CONST (VOIDmode,
16243 XEXP (XEXP (disp, 0), 0));
16247 output_pic_addr_const (file, disp, 0);
16248 else if (GET_CODE (disp) == LABEL_REF)
16249 output_asm_label (disp);
16250 else if (CONST_INT_P (disp))
16253 output_addr_const (file, disp);
16259 print_reg (base, code, file);
16262 if (INTVAL (offset) >= 0)
16264 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16268 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16275 print_reg (index, vsib ? 0 : code, file);
16276 if (scale != 1 || vsib)
16277 fprintf (file, "*%d", scale);
16284 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16287 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16291 if (GET_CODE (x) != UNSPEC)
16294 op = XVECEXP (x, 0, 0);
16295 switch (XINT (x, 1))
16297 case UNSPEC_GOTTPOFF:
16298 output_addr_const (file, op);
16299 /* FIXME: This might be @TPOFF in Sun ld. */
16300 fputs ("@gottpoff", file);
16303 output_addr_const (file, op);
16304 fputs ("@tpoff", file);
16306 case UNSPEC_NTPOFF:
16307 output_addr_const (file, op);
16309 fputs ("@tpoff", file);
16311 fputs ("@ntpoff", file);
16313 case UNSPEC_DTPOFF:
16314 output_addr_const (file, op);
16315 fputs ("@dtpoff", file);
16317 case UNSPEC_GOTNTPOFF:
16318 output_addr_const (file, op);
16320 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16321 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16323 fputs ("@gotntpoff", file);
16325 case UNSPEC_INDNTPOFF:
16326 output_addr_const (file, op);
16327 fputs ("@indntpoff", file);
16330 case UNSPEC_MACHOPIC_OFFSET:
16331 output_addr_const (file, op);
16333 machopic_output_function_base_name (file);
16337 case UNSPEC_STACK_CHECK:
16341 gcc_assert (flag_split_stack);
16343 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16344 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16346 gcc_unreachable ();
16349 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16360 /* Split one or more double-mode RTL references into pairs of half-mode
16361 references. The RTL can be REG, offsettable MEM, integer constant, or
16362 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16363 split and "num" is its length. lo_half and hi_half are output arrays
16364 that parallel "operands". */
16367 split_double_mode (machine_mode mode, rtx operands[],
16368 int num, rtx lo_half[], rtx hi_half[])
16370 machine_mode half_mode;
16376 half_mode = DImode;
16379 half_mode = SImode;
16382 gcc_unreachable ();
16385 byte = GET_MODE_SIZE (half_mode);
16389 rtx op = operands[num];
16391 /* simplify_subreg refuse to split volatile memory addresses,
16392 but we still have to handle it. */
16395 lo_half[num] = adjust_address (op, half_mode, 0);
16396 hi_half[num] = adjust_address (op, half_mode, byte);
16400 lo_half[num] = simplify_gen_subreg (half_mode, op,
16401 GET_MODE (op) == VOIDmode
16402 ? mode : GET_MODE (op), 0);
16403 hi_half[num] = simplify_gen_subreg (half_mode, op,
16404 GET_MODE (op) == VOIDmode
16405 ? mode : GET_MODE (op), byte);
16410 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16411 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16412 is the expression of the binary operation. The output may either be
16413 emitted here, or returned to the caller, like all output_* functions.
16415 There is no guarantee that the operands are the same mode, as they
16416 might be within FLOAT or FLOAT_EXTEND expressions. */
16418 #ifndef SYSV386_COMPAT
16419 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16420 wants to fix the assemblers because that causes incompatibility
16421 with gcc. No-one wants to fix gcc because that causes
16422 incompatibility with assemblers... You can use the option of
16423 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16424 #define SYSV386_COMPAT 1
16428 output_387_binary_op (rtx insn, rtx *operands)
16430 static char buf[40];
16433 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16435 #ifdef ENABLE_CHECKING
16436 /* Even if we do not want to check the inputs, this documents input
16437 constraints. Which helps in understanding the following code. */
16438 if (STACK_REG_P (operands[0])
16439 && ((REG_P (operands[1])
16440 && REGNO (operands[0]) == REGNO (operands[1])
16441 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16442 || (REG_P (operands[2])
16443 && REGNO (operands[0]) == REGNO (operands[2])
16444 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16445 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16448 gcc_assert (is_sse);
16451 switch (GET_CODE (operands[3]))
16454 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16455 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16463 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16464 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16472 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16473 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16481 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16482 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16490 gcc_unreachable ();
16497 strcpy (buf, ssep);
16498 if (GET_MODE (operands[0]) == SFmode)
16499 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16501 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16505 strcpy (buf, ssep + 1);
16506 if (GET_MODE (operands[0]) == SFmode)
16507 strcat (buf, "ss\t{%2, %0|%0, %2}");
16509 strcat (buf, "sd\t{%2, %0|%0, %2}");
16515 switch (GET_CODE (operands[3]))
16519 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16520 std::swap (operands[1], operands[2]);
16522 /* know operands[0] == operands[1]. */
16524 if (MEM_P (operands[2]))
16530 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16532 if (STACK_TOP_P (operands[0]))
16533 /* How is it that we are storing to a dead operand[2]?
16534 Well, presumably operands[1] is dead too. We can't
16535 store the result to st(0) as st(0) gets popped on this
16536 instruction. Instead store to operands[2] (which I
16537 think has to be st(1)). st(1) will be popped later.
16538 gcc <= 2.8.1 didn't have this check and generated
16539 assembly code that the Unixware assembler rejected. */
16540 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16542 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16546 if (STACK_TOP_P (operands[0]))
16547 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16549 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16554 if (MEM_P (operands[1]))
16560 if (MEM_P (operands[2]))
16566 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16569 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16570 derived assemblers, confusingly reverse the direction of
16571 the operation for fsub{r} and fdiv{r} when the
16572 destination register is not st(0). The Intel assembler
16573 doesn't have this brain damage. Read !SYSV386_COMPAT to
16574 figure out what the hardware really does. */
16575 if (STACK_TOP_P (operands[0]))
16576 p = "{p\t%0, %2|rp\t%2, %0}";
16578 p = "{rp\t%2, %0|p\t%0, %2}";
16580 if (STACK_TOP_P (operands[0]))
16581 /* As above for fmul/fadd, we can't store to st(0). */
16582 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16584 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16589 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16592 if (STACK_TOP_P (operands[0]))
16593 p = "{rp\t%0, %1|p\t%1, %0}";
16595 p = "{p\t%1, %0|rp\t%0, %1}";
16597 if (STACK_TOP_P (operands[0]))
16598 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16600 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16605 if (STACK_TOP_P (operands[0]))
16607 if (STACK_TOP_P (operands[1]))
16608 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16610 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16613 else if (STACK_TOP_P (operands[1]))
16616 p = "{\t%1, %0|r\t%0, %1}";
16618 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16624 p = "{r\t%2, %0|\t%0, %2}";
16626 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16632 gcc_unreachable ();
16639 /* Check if a 256bit AVX register is referenced inside of EXP. */
16642 ix86_check_avx256_register (const_rtx exp)
16644 if (GET_CODE (exp) == SUBREG)
16645 exp = SUBREG_REG (exp);
16647 return (REG_P (exp)
16648 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16651 /* Return needed mode for entity in optimize_mode_switching pass. */
16654 ix86_avx_u128_mode_needed (rtx_insn *insn)
16660 /* Needed mode is set to AVX_U128_CLEAN if there are
16661 no 256bit modes used in function arguments. */
16662 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16664 link = XEXP (link, 1))
16666 if (GET_CODE (XEXP (link, 0)) == USE)
16668 rtx arg = XEXP (XEXP (link, 0), 0);
16670 if (ix86_check_avx256_register (arg))
16671 return AVX_U128_DIRTY;
16675 return AVX_U128_CLEAN;
16678 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16679 changes state only when a 256bit register is written to, but we need
16680 to prevent the compiler from moving optimal insertion point above
16681 eventual read from 256bit register. */
16682 subrtx_iterator::array_type array;
16683 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16684 if (ix86_check_avx256_register (*iter))
16685 return AVX_U128_DIRTY;
16687 return AVX_U128_ANY;
16690 /* Return mode that i387 must be switched into
16691 prior to the execution of insn. */
16694 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16696 enum attr_i387_cw mode;
16698 /* The mode UNINITIALIZED is used to store control word after a
16699 function call or ASM pattern. The mode ANY specify that function
16700 has no requirements on the control word and make no changes in the
16701 bits we are interested in. */
16704 || (NONJUMP_INSN_P (insn)
16705 && (asm_noperands (PATTERN (insn)) >= 0
16706 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16707 return I387_CW_UNINITIALIZED;
16709 if (recog_memoized (insn) < 0)
16710 return I387_CW_ANY;
16712 mode = get_attr_i387_cw (insn);
16717 if (mode == I387_CW_TRUNC)
16722 if (mode == I387_CW_FLOOR)
16727 if (mode == I387_CW_CEIL)
16732 if (mode == I387_CW_MASK_PM)
16737 gcc_unreachable ();
16740 return I387_CW_ANY;
16743 /* Return mode that entity must be switched into
16744 prior to the execution of insn. */
16747 ix86_mode_needed (int entity, rtx_insn *insn)
16752 return ix86_avx_u128_mode_needed (insn);
16757 return ix86_i387_mode_needed (entity, insn);
16759 gcc_unreachable ();
16764 /* Check if a 256bit AVX register is referenced in stores. */
16767 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16769 if (ix86_check_avx256_register (dest))
16771 bool *used = (bool *) data;
16776 /* Calculate mode of upper 128bit AVX registers after the insn. */
16779 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16781 rtx pat = PATTERN (insn);
16783 if (vzeroupper_operation (pat, VOIDmode)
16784 || vzeroall_operation (pat, VOIDmode))
16785 return AVX_U128_CLEAN;
16787 /* We know that state is clean after CALL insn if there are no
16788 256bit registers used in the function return register. */
16791 bool avx_reg256_found = false;
16792 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16794 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16797 /* Otherwise, return current mode. Remember that if insn
16798 references AVX 256bit registers, the mode was already changed
16799 to DIRTY from MODE_NEEDED. */
16803 /* Return the mode that an insn results in. */
16806 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16811 return ix86_avx_u128_mode_after (mode, insn);
16818 gcc_unreachable ();
16823 ix86_avx_u128_mode_entry (void)
16827 /* Entry mode is set to AVX_U128_DIRTY if there are
16828 256bit modes used in function arguments. */
16829 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16830 arg = TREE_CHAIN (arg))
16832 rtx incoming = DECL_INCOMING_RTL (arg);
16834 if (incoming && ix86_check_avx256_register (incoming))
16835 return AVX_U128_DIRTY;
16838 return AVX_U128_CLEAN;
16841 /* Return a mode that ENTITY is assumed to be
16842 switched to at function entry. */
16845 ix86_mode_entry (int entity)
16850 return ix86_avx_u128_mode_entry ();
16855 return I387_CW_ANY;
16857 gcc_unreachable ();
16862 ix86_avx_u128_mode_exit (void)
16864 rtx reg = crtl->return_rtx;
16866 /* Exit mode is set to AVX_U128_DIRTY if there are
16867 256bit modes used in the function return register. */
16868 if (reg && ix86_check_avx256_register (reg))
16869 return AVX_U128_DIRTY;
16871 return AVX_U128_CLEAN;
16874 /* Return a mode that ENTITY is assumed to be
16875 switched to at function exit. */
16878 ix86_mode_exit (int entity)
16883 return ix86_avx_u128_mode_exit ();
16888 return I387_CW_ANY;
16890 gcc_unreachable ();
16895 ix86_mode_priority (int, int n)
16900 /* Output code to initialize control word copies used by trunc?f?i and
16901 rounding patterns. CURRENT_MODE is set to current control word,
16902 while NEW_MODE is set to new control word. */
16905 emit_i387_cw_initialization (int mode)
16907 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16910 enum ix86_stack_slot slot;
16912 rtx reg = gen_reg_rtx (HImode);
16914 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16915 emit_move_insn (reg, copy_rtx (stored_mode));
16917 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16918 || optimize_insn_for_size_p ())
16922 case I387_CW_TRUNC:
16923 /* round toward zero (truncate) */
16924 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16925 slot = SLOT_CW_TRUNC;
16928 case I387_CW_FLOOR:
16929 /* round down toward -oo */
16930 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16931 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16932 slot = SLOT_CW_FLOOR;
16936 /* round up toward +oo */
16937 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16938 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16939 slot = SLOT_CW_CEIL;
16942 case I387_CW_MASK_PM:
16943 /* mask precision exception for nearbyint() */
16944 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16945 slot = SLOT_CW_MASK_PM;
16949 gcc_unreachable ();
16956 case I387_CW_TRUNC:
16957 /* round toward zero (truncate) */
16958 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16959 slot = SLOT_CW_TRUNC;
16962 case I387_CW_FLOOR:
16963 /* round down toward -oo */
16964 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16965 slot = SLOT_CW_FLOOR;
16969 /* round up toward +oo */
16970 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16971 slot = SLOT_CW_CEIL;
16974 case I387_CW_MASK_PM:
16975 /* mask precision exception for nearbyint() */
16976 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16977 slot = SLOT_CW_MASK_PM;
16981 gcc_unreachable ();
16985 gcc_assert (slot < MAX_386_STACK_LOCALS);
16987 new_mode = assign_386_stack_local (HImode, slot);
16988 emit_move_insn (new_mode, reg);
16991 /* Emit vzeroupper. */
16994 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16998 /* Cancel automatic vzeroupper insertion if there are
16999 live call-saved SSE registers at the insertion point. */
17001 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17002 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17006 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17007 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17010 emit_insn (gen_avx_vzeroupper ());
17013 /* Generate one or more insns to set ENTITY to MODE. */
17015 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
17016 is the set of hard registers live at the point where the insn(s)
17017 are to be inserted. */
17020 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
17021 HARD_REG_SET regs_live)
17026 if (mode == AVX_U128_CLEAN)
17027 ix86_avx_emit_vzeroupper (regs_live);
17033 if (mode != I387_CW_ANY
17034 && mode != I387_CW_UNINITIALIZED)
17035 emit_i387_cw_initialization (mode);
17038 gcc_unreachable ();
17042 /* Output code for INSN to convert a float to a signed int. OPERANDS
17043 are the insn operands. The output may be [HSD]Imode and the input
17044 operand may be [SDX]Fmode. */
17047 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
17049 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17050 int dimode_p = GET_MODE (operands[0]) == DImode;
17051 int round_mode = get_attr_i387_cw (insn);
17053 /* Jump through a hoop or two for DImode, since the hardware has no
17054 non-popping instruction. We used to do this a different way, but
17055 that was somewhat fragile and broke with post-reload splitters. */
17056 if ((dimode_p || fisttp) && !stack_top_dies)
17057 output_asm_insn ("fld\t%y1", operands);
17059 gcc_assert (STACK_TOP_P (operands[1]));
17060 gcc_assert (MEM_P (operands[0]));
17061 gcc_assert (GET_MODE (operands[1]) != TFmode);
17064 output_asm_insn ("fisttp%Z0\t%0", operands);
17067 if (round_mode != I387_CW_ANY)
17068 output_asm_insn ("fldcw\t%3", operands);
17069 if (stack_top_dies || dimode_p)
17070 output_asm_insn ("fistp%Z0\t%0", operands);
17072 output_asm_insn ("fist%Z0\t%0", operands);
17073 if (round_mode != I387_CW_ANY)
17074 output_asm_insn ("fldcw\t%2", operands);
17080 /* Output code for x87 ffreep insn. The OPNO argument, which may only
17081 have the values zero or one, indicates the ffreep insn's operand
17082 from the OPERANDS array. */
17084 static const char *
17085 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
17087 if (TARGET_USE_FFREEP)
17088 #ifdef HAVE_AS_IX86_FFREEP
17089 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
17092 static char retval[32];
17093 int regno = REGNO (operands[opno]);
17095 gcc_assert (STACK_REGNO_P (regno));
17097 regno -= FIRST_STACK_REG;
17099 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
17104 return opno ? "fstp\t%y1" : "fstp\t%y0";
17108 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
17109 should be used. UNORDERED_P is true when fucom should be used. */
17112 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17114 int stack_top_dies;
17115 rtx cmp_op0, cmp_op1;
17116 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17120 cmp_op0 = operands[0];
17121 cmp_op1 = operands[1];
17125 cmp_op0 = operands[1];
17126 cmp_op1 = operands[2];
17131 if (GET_MODE (operands[0]) == SFmode)
17133 return "%vucomiss\t{%1, %0|%0, %1}";
17135 return "%vcomiss\t{%1, %0|%0, %1}";
17138 return "%vucomisd\t{%1, %0|%0, %1}";
17140 return "%vcomisd\t{%1, %0|%0, %1}";
17143 gcc_assert (STACK_TOP_P (cmp_op0));
17145 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17147 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17149 if (stack_top_dies)
17151 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17152 return output_387_ffreep (operands, 1);
17155 return "ftst\n\tfnstsw\t%0";
17158 if (STACK_REG_P (cmp_op1)
17160 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17161 && REGNO (cmp_op1) != FIRST_STACK_REG)
17163 /* If both the top of the 387 stack dies, and the other operand
17164 is also a stack register that dies, then this must be a
17165 `fcompp' float compare */
17169 /* There is no double popping fcomi variant. Fortunately,
17170 eflags is immune from the fstp's cc clobbering. */
17172 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17174 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17175 return output_387_ffreep (operands, 0);
17180 return "fucompp\n\tfnstsw\t%0";
17182 return "fcompp\n\tfnstsw\t%0";
17187 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17189 static const char * const alt[16] =
17191 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17192 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17193 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17194 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17196 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17197 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17201 "fcomi\t{%y1, %0|%0, %y1}",
17202 "fcomip\t{%y1, %0|%0, %y1}",
17203 "fucomi\t{%y1, %0|%0, %y1}",
17204 "fucomip\t{%y1, %0|%0, %y1}",
17215 mask = eflags_p << 3;
17216 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17217 mask |= unordered_p << 1;
17218 mask |= stack_top_dies;
17220 gcc_assert (mask < 16);
17229 ix86_output_addr_vec_elt (FILE *file, int value)
17231 const char *directive = ASM_LONG;
17235 directive = ASM_QUAD;
17237 gcc_assert (!TARGET_64BIT);
17240 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17244 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17246 const char *directive = ASM_LONG;
17249 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17250 directive = ASM_QUAD;
17252 gcc_assert (!TARGET_64BIT);
17254 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17255 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17256 fprintf (file, "%s%s%d-%s%d\n",
17257 directive, LPREFIX, value, LPREFIX, rel);
17258 else if (HAVE_AS_GOTOFF_IN_DATA)
17259 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17261 else if (TARGET_MACHO)
17263 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17264 machopic_output_function_base_name (file);
17269 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17270 GOT_SYMBOL_NAME, LPREFIX, value);
17273 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17277 ix86_expand_clear (rtx dest)
17281 /* We play register width games, which are only valid after reload. */
17282 gcc_assert (reload_completed);
17284 /* Avoid HImode and its attendant prefix byte. */
17285 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17286 dest = gen_rtx_REG (SImode, REGNO (dest));
17287 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
17289 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17291 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17292 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17298 /* X is an unchanging MEM. If it is a constant pool reference, return
17299 the constant pool rtx, else NULL. */
17302 maybe_get_pool_constant (rtx x)
17304 x = ix86_delegitimize_address (XEXP (x, 0));
17306 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17307 return get_pool_constant (x);
17313 ix86_expand_move (machine_mode mode, rtx operands[])
17316 enum tls_model model;
17321 if (GET_CODE (op1) == SYMBOL_REF)
17325 model = SYMBOL_REF_TLS_MODEL (op1);
17328 op1 = legitimize_tls_address (op1, model, true);
17329 op1 = force_operand (op1, op0);
17332 op1 = convert_to_mode (mode, op1, 1);
17334 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17337 else if (GET_CODE (op1) == CONST
17338 && GET_CODE (XEXP (op1, 0)) == PLUS
17339 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17341 rtx addend = XEXP (XEXP (op1, 0), 1);
17342 rtx symbol = XEXP (XEXP (op1, 0), 0);
17345 model = SYMBOL_REF_TLS_MODEL (symbol);
17347 tmp = legitimize_tls_address (symbol, model, true);
17349 tmp = legitimize_pe_coff_symbol (symbol, true);
17353 tmp = force_operand (tmp, NULL);
17354 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17355 op0, 1, OPTAB_DIRECT);
17358 op1 = convert_to_mode (mode, tmp, 1);
17362 if ((flag_pic || MACHOPIC_INDIRECT)
17363 && symbolic_operand (op1, mode))
17365 if (TARGET_MACHO && !TARGET_64BIT)
17368 /* dynamic-no-pic */
17369 if (MACHOPIC_INDIRECT)
17371 rtx temp = ((reload_in_progress
17372 || ((op0 && REG_P (op0))
17374 ? op0 : gen_reg_rtx (Pmode));
17375 op1 = machopic_indirect_data_reference (op1, temp);
17377 op1 = machopic_legitimize_pic_address (op1, mode,
17378 temp == op1 ? 0 : temp);
17380 if (op0 != op1 && GET_CODE (op0) != MEM)
17382 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
17386 if (GET_CODE (op0) == MEM)
17387 op1 = force_reg (Pmode, op1);
17391 if (GET_CODE (temp) != REG)
17392 temp = gen_reg_rtx (Pmode);
17393 temp = legitimize_pic_address (op1, temp);
17398 /* dynamic-no-pic */
17404 op1 = force_reg (mode, op1);
17405 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17407 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17408 op1 = legitimize_pic_address (op1, reg);
17411 op1 = convert_to_mode (mode, op1, 1);
17418 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17419 || !push_operand (op0, mode))
17421 op1 = force_reg (mode, op1);
17423 if (push_operand (op0, mode)
17424 && ! general_no_elim_operand (op1, mode))
17425 op1 = copy_to_mode_reg (mode, op1);
17427 /* Force large constants in 64bit compilation into register
17428 to get them CSEed. */
17429 if (can_create_pseudo_p ()
17430 && (mode == DImode) && TARGET_64BIT
17431 && immediate_operand (op1, mode)
17432 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17433 && !register_operand (op0, mode)
17435 op1 = copy_to_mode_reg (mode, op1);
17437 if (can_create_pseudo_p ()
17438 && FLOAT_MODE_P (mode)
17439 && GET_CODE (op1) == CONST_DOUBLE)
17441 /* If we are loading a floating point constant to a register,
17442 force the value to memory now, since we'll get better code
17443 out the back end. */
17445 op1 = validize_mem (force_const_mem (mode, op1));
17446 if (!register_operand (op0, mode))
17448 rtx temp = gen_reg_rtx (mode);
17449 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
17450 emit_move_insn (op0, temp);
17456 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17460 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17462 rtx op0 = operands[0], op1 = operands[1];
17463 unsigned int align = GET_MODE_ALIGNMENT (mode);
17465 if (push_operand (op0, VOIDmode))
17466 op0 = emit_move_resolve_push (mode, op0);
17468 /* Force constants other than zero into memory. We do not know how
17469 the instructions used to build constants modify the upper 64 bits
17470 of the register, once we have that information we may be able
17471 to handle some of them more efficiently. */
17472 if (can_create_pseudo_p ()
17473 && register_operand (op0, mode)
17474 && (CONSTANT_P (op1)
17475 || (GET_CODE (op1) == SUBREG
17476 && CONSTANT_P (SUBREG_REG (op1))))
17477 && !standard_sse_constant_p (op1))
17478 op1 = validize_mem (force_const_mem (mode, op1));
17480 /* We need to check memory alignment for SSE mode since attribute
17481 can make operands unaligned. */
17482 if (can_create_pseudo_p ()
17483 && SSE_REG_MODE_P (mode)
17484 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17485 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17489 /* ix86_expand_vector_move_misalign() does not like constants ... */
17490 if (CONSTANT_P (op1)
17491 || (GET_CODE (op1) == SUBREG
17492 && CONSTANT_P (SUBREG_REG (op1))))
17493 op1 = validize_mem (force_const_mem (mode, op1));
17495 /* ... nor both arguments in memory. */
17496 if (!register_operand (op0, mode)
17497 && !register_operand (op1, mode))
17498 op1 = force_reg (mode, op1);
17500 tmp[0] = op0; tmp[1] = op1;
17501 ix86_expand_vector_move_misalign (mode, tmp);
17505 /* Make operand1 a register if it isn't already. */
17506 if (can_create_pseudo_p ()
17507 && !register_operand (op0, mode)
17508 && !register_operand (op1, mode))
17510 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17514 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17517 /* Split 32-byte AVX unaligned load and store if needed. */
17520 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17523 rtx (*extract) (rtx, rtx, rtx);
17524 rtx (*load_unaligned) (rtx, rtx);
17525 rtx (*store_unaligned) (rtx, rtx);
17528 switch (GET_MODE (op0))
17531 gcc_unreachable ();
17533 extract = gen_avx_vextractf128v32qi;
17534 load_unaligned = gen_avx_loaddquv32qi;
17535 store_unaligned = gen_avx_storedquv32qi;
17539 extract = gen_avx_vextractf128v8sf;
17540 load_unaligned = gen_avx_loadups256;
17541 store_unaligned = gen_avx_storeups256;
17545 extract = gen_avx_vextractf128v4df;
17546 load_unaligned = gen_avx_loadupd256;
17547 store_unaligned = gen_avx_storeupd256;
17554 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
17555 && optimize_insn_for_speed_p ())
17557 rtx r = gen_reg_rtx (mode);
17558 m = adjust_address (op1, mode, 0);
17559 emit_move_insn (r, m);
17560 m = adjust_address (op1, mode, 16);
17561 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17562 emit_move_insn (op0, r);
17564 /* Normal *mov<mode>_internal pattern will handle
17565 unaligned loads just fine if misaligned_operand
17566 is true, and without the UNSPEC it can be combined
17567 with arithmetic instructions. */
17568 else if (misaligned_operand (op1, GET_MODE (op1)))
17569 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17571 emit_insn (load_unaligned (op0, op1));
17573 else if (MEM_P (op0))
17575 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
17576 && optimize_insn_for_speed_p ())
17578 m = adjust_address (op0, mode, 0);
17579 emit_insn (extract (m, op1, const0_rtx));
17580 m = adjust_address (op0, mode, 16);
17581 emit_insn (extract (m, op1, const1_rtx));
17584 emit_insn (store_unaligned (op0, op1));
17587 gcc_unreachable ();
17590 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17591 straight to ix86_expand_vector_move. */
17592 /* Code generation for scalar reg-reg moves of single and double precision data:
17593 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17597 if (x86_sse_partial_reg_dependency == true)
17602 Code generation for scalar loads of double precision data:
17603 if (x86_sse_split_regs == true)
17604 movlpd mem, reg (gas syntax)
17608 Code generation for unaligned packed loads of single precision data
17609 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17610 if (x86_sse_unaligned_move_optimal)
17613 if (x86_sse_partial_reg_dependency == true)
17625 Code generation for unaligned packed loads of double precision data
17626 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17627 if (x86_sse_unaligned_move_optimal)
17630 if (x86_sse_split_regs == true)
17643 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17645 rtx op0, op1, orig_op0 = NULL_RTX, m;
17646 rtx (*load_unaligned) (rtx, rtx);
17647 rtx (*store_unaligned) (rtx, rtx);
17652 if (GET_MODE_SIZE (mode) == 64)
17654 switch (GET_MODE_CLASS (mode))
17656 case MODE_VECTOR_INT:
17658 if (GET_MODE (op0) != V16SImode)
17663 op0 = gen_reg_rtx (V16SImode);
17666 op0 = gen_lowpart (V16SImode, op0);
17668 op1 = gen_lowpart (V16SImode, op1);
17671 case MODE_VECTOR_FLOAT:
17672 switch (GET_MODE (op0))
17675 gcc_unreachable ();
17677 load_unaligned = gen_avx512f_loaddquv16si;
17678 store_unaligned = gen_avx512f_storedquv16si;
17681 load_unaligned = gen_avx512f_loadups512;
17682 store_unaligned = gen_avx512f_storeups512;
17685 load_unaligned = gen_avx512f_loadupd512;
17686 store_unaligned = gen_avx512f_storeupd512;
17691 emit_insn (load_unaligned (op0, op1));
17692 else if (MEM_P (op0))
17693 emit_insn (store_unaligned (op0, op1));
17695 gcc_unreachable ();
17697 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17701 gcc_unreachable ();
17708 && GET_MODE_SIZE (mode) == 32)
17710 switch (GET_MODE_CLASS (mode))
17712 case MODE_VECTOR_INT:
17714 if (GET_MODE (op0) != V32QImode)
17719 op0 = gen_reg_rtx (V32QImode);
17722 op0 = gen_lowpart (V32QImode, op0);
17724 op1 = gen_lowpart (V32QImode, op1);
17727 case MODE_VECTOR_FLOAT:
17728 ix86_avx256_split_vector_move_misalign (op0, op1);
17730 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17734 gcc_unreachable ();
17742 /* Normal *mov<mode>_internal pattern will handle
17743 unaligned loads just fine if misaligned_operand
17744 is true, and without the UNSPEC it can be combined
17745 with arithmetic instructions. */
17747 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17748 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17749 && misaligned_operand (op1, GET_MODE (op1)))
17750 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17751 /* ??? If we have typed data, then it would appear that using
17752 movdqu is the only way to get unaligned data loaded with
17754 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17756 if (GET_MODE (op0) != V16QImode)
17759 op0 = gen_reg_rtx (V16QImode);
17761 op1 = gen_lowpart (V16QImode, op1);
17762 /* We will eventually emit movups based on insn attributes. */
17763 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17765 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17767 else if (TARGET_SSE2 && mode == V2DFmode)
17772 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17773 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17774 || optimize_insn_for_size_p ())
17776 /* We will eventually emit movups based on insn attributes. */
17777 emit_insn (gen_sse2_loadupd (op0, op1));
17781 /* When SSE registers are split into halves, we can avoid
17782 writing to the top half twice. */
17783 if (TARGET_SSE_SPLIT_REGS)
17785 emit_clobber (op0);
17790 /* ??? Not sure about the best option for the Intel chips.
17791 The following would seem to satisfy; the register is
17792 entirely cleared, breaking the dependency chain. We
17793 then store to the upper half, with a dependency depth
17794 of one. A rumor has it that Intel recommends two movsd
17795 followed by an unpacklpd, but this is unconfirmed. And
17796 given that the dependency depth of the unpacklpd would
17797 still be one, I'm not sure why this would be better. */
17798 zero = CONST0_RTX (V2DFmode);
17801 m = adjust_address (op1, DFmode, 0);
17802 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17803 m = adjust_address (op1, DFmode, 8);
17804 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17811 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17812 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17813 || optimize_insn_for_size_p ())
17815 if (GET_MODE (op0) != V4SFmode)
17818 op0 = gen_reg_rtx (V4SFmode);
17820 op1 = gen_lowpart (V4SFmode, op1);
17821 emit_insn (gen_sse_loadups (op0, op1));
17823 emit_move_insn (orig_op0,
17824 gen_lowpart (GET_MODE (orig_op0), op0));
17828 if (mode != V4SFmode)
17829 t = gen_reg_rtx (V4SFmode);
17833 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17834 emit_move_insn (t, CONST0_RTX (V4SFmode));
17838 m = adjust_address (op1, V2SFmode, 0);
17839 emit_insn (gen_sse_loadlps (t, t, m));
17840 m = adjust_address (op1, V2SFmode, 8);
17841 emit_insn (gen_sse_loadhps (t, t, m));
17842 if (mode != V4SFmode)
17843 emit_move_insn (op0, gen_lowpart (mode, t));
17846 else if (MEM_P (op0))
17848 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17850 op0 = gen_lowpart (V16QImode, op0);
17851 op1 = gen_lowpart (V16QImode, op1);
17852 /* We will eventually emit movups based on insn attributes. */
17853 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17855 else if (TARGET_SSE2 && mode == V2DFmode)
17858 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17859 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17860 || optimize_insn_for_size_p ())
17861 /* We will eventually emit movups based on insn attributes. */
17862 emit_insn (gen_sse2_storeupd (op0, op1));
17865 m = adjust_address (op0, DFmode, 0);
17866 emit_insn (gen_sse2_storelpd (m, op1));
17867 m = adjust_address (op0, DFmode, 8);
17868 emit_insn (gen_sse2_storehpd (m, op1));
17873 if (mode != V4SFmode)
17874 op1 = gen_lowpart (V4SFmode, op1);
17877 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17878 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17879 || optimize_insn_for_size_p ())
17881 op0 = gen_lowpart (V4SFmode, op0);
17882 emit_insn (gen_sse_storeups (op0, op1));
17886 m = adjust_address (op0, V2SFmode, 0);
17887 emit_insn (gen_sse_storelps (m, op1));
17888 m = adjust_address (op0, V2SFmode, 8);
17889 emit_insn (gen_sse_storehps (m, op1));
17894 gcc_unreachable ();
17897 /* Helper function of ix86_fixup_binary_operands to canonicalize
17898 operand order. Returns true if the operands should be swapped. */
17901 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
17904 rtx dst = operands[0];
17905 rtx src1 = operands[1];
17906 rtx src2 = operands[2];
17908 /* If the operation is not commutative, we can't do anything. */
17909 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17912 /* Highest priority is that src1 should match dst. */
17913 if (rtx_equal_p (dst, src1))
17915 if (rtx_equal_p (dst, src2))
17918 /* Next highest priority is that immediate constants come second. */
17919 if (immediate_operand (src2, mode))
17921 if (immediate_operand (src1, mode))
17924 /* Lowest priority is that memory references should come second. */
17934 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17935 destination to use for the operation. If different from the true
17936 destination in operands[0], a copy operation will be required. */
17939 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
17942 rtx dst = operands[0];
17943 rtx src1 = operands[1];
17944 rtx src2 = operands[2];
17946 /* Canonicalize operand order. */
17947 if (ix86_swap_binary_operands_p (code, mode, operands))
17949 /* It is invalid to swap operands of different modes. */
17950 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17952 std::swap (src1, src2);
17955 /* Both source operands cannot be in memory. */
17956 if (MEM_P (src1) && MEM_P (src2))
17958 /* Optimization: Only read from memory once. */
17959 if (rtx_equal_p (src1, src2))
17961 src2 = force_reg (mode, src2);
17964 else if (rtx_equal_p (dst, src1))
17965 src2 = force_reg (mode, src2);
17967 src1 = force_reg (mode, src1);
17970 /* If the destination is memory, and we do not have matching source
17971 operands, do things in registers. */
17972 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17973 dst = gen_reg_rtx (mode);
17975 /* Source 1 cannot be a constant. */
17976 if (CONSTANT_P (src1))
17977 src1 = force_reg (mode, src1);
17979 /* Source 1 cannot be a non-matching memory. */
17980 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17981 src1 = force_reg (mode, src1);
17983 /* Improve address combine. */
17985 && GET_MODE_CLASS (mode) == MODE_INT
17987 src2 = force_reg (mode, src2);
17989 operands[1] = src1;
17990 operands[2] = src2;
17994 /* Similarly, but assume that the destination has already been
17995 set up properly. */
17998 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17999 machine_mode mode, rtx operands[])
18001 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
18002 gcc_assert (dst == operands[0]);
18005 /* Attempt to expand a binary operator. Make the expansion closer to the
18006 actual machine, then just general_operand, which will allow 3 separate
18007 memory references (one output, two input) in a single insn. */
18010 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
18013 rtx src1, src2, dst, op, clob;
18015 dst = ix86_fixup_binary_operands (code, mode, operands);
18016 src1 = operands[1];
18017 src2 = operands[2];
18019 /* Emit the instruction. */
18021 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
18022 if (reload_in_progress)
18024 /* Reload doesn't know about the flags register, and doesn't know that
18025 it doesn't want to clobber it. We can only do this with PLUS. */
18026 gcc_assert (code == PLUS);
18029 else if (reload_completed
18031 && !rtx_equal_p (dst, src1))
18033 /* This is going to be an LEA; avoid splitting it later. */
18038 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18039 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18042 /* Fix up the destination if needed. */
18043 if (dst != operands[0])
18044 emit_move_insn (operands[0], dst);
18047 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
18048 the given OPERANDS. */
18051 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
18054 rtx op1 = NULL_RTX, op2 = NULL_RTX;
18055 if (GET_CODE (operands[1]) == SUBREG)
18060 else if (GET_CODE (operands[2]) == SUBREG)
18065 /* Optimize (__m128i) d | (__m128i) e and similar code
18066 when d and e are float vectors into float vector logical
18067 insn. In C/C++ without using intrinsics there is no other way
18068 to express vector logical operation on float vectors than
18069 to cast them temporarily to integer vectors. */
18071 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18072 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
18073 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
18074 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
18075 && SUBREG_BYTE (op1) == 0
18076 && (GET_CODE (op2) == CONST_VECTOR
18077 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
18078 && SUBREG_BYTE (op2) == 0))
18079 && can_create_pseudo_p ())
18082 switch (GET_MODE (SUBREG_REG (op1)))
18090 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
18091 if (GET_CODE (op2) == CONST_VECTOR)
18093 op2 = gen_lowpart (GET_MODE (dst), op2);
18094 op2 = force_reg (GET_MODE (dst), op2);
18099 op2 = SUBREG_REG (operands[2]);
18100 if (!nonimmediate_operand (op2, GET_MODE (dst)))
18101 op2 = force_reg (GET_MODE (dst), op2);
18103 op1 = SUBREG_REG (op1);
18104 if (!nonimmediate_operand (op1, GET_MODE (dst)))
18105 op1 = force_reg (GET_MODE (dst), op1);
18106 emit_insn (gen_rtx_SET (VOIDmode, dst,
18107 gen_rtx_fmt_ee (code, GET_MODE (dst),
18109 emit_move_insn (operands[0], gen_lowpart (mode, dst));
18115 if (!nonimmediate_operand (operands[1], mode))
18116 operands[1] = force_reg (mode, operands[1]);
18117 if (!nonimmediate_operand (operands[2], mode))
18118 operands[2] = force_reg (mode, operands[2]);
18119 ix86_fixup_binary_operands_no_copy (code, mode, operands);
18120 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18121 gen_rtx_fmt_ee (code, mode, operands[1],
18125 /* Return TRUE or FALSE depending on whether the binary operator meets the
18126 appropriate constraints. */
18129 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18132 rtx dst = operands[0];
18133 rtx src1 = operands[1];
18134 rtx src2 = operands[2];
18136 /* Both source operands cannot be in memory. */
18137 if (MEM_P (src1) && MEM_P (src2))
18140 /* Canonicalize operand order for commutative operators. */
18141 if (ix86_swap_binary_operands_p (code, mode, operands))
18142 std::swap (src1, src2);
18144 /* If the destination is memory, we must have a matching source operand. */
18145 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18148 /* Source 1 cannot be a constant. */
18149 if (CONSTANT_P (src1))
18152 /* Source 1 cannot be a non-matching memory. */
18153 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18154 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18155 return (code == AND
18158 || (TARGET_64BIT && mode == DImode))
18159 && satisfies_constraint_L (src2));
18164 /* Attempt to expand a unary operator. Make the expansion closer to the
18165 actual machine, then just general_operand, which will allow 2 separate
18166 memory references (one output, one input) in a single insn. */
18169 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18172 bool matching_memory = false;
18173 rtx src, dst, op, clob;
18178 /* If the destination is memory, and we do not have matching source
18179 operands, do things in registers. */
18182 if (rtx_equal_p (dst, src))
18183 matching_memory = true;
18185 dst = gen_reg_rtx (mode);
18188 /* When source operand is memory, destination must match. */
18189 if (MEM_P (src) && !matching_memory)
18190 src = force_reg (mode, src);
18192 /* Emit the instruction. */
18194 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
18195 if (reload_in_progress || code == NOT)
18197 /* Reload doesn't know about the flags register, and doesn't know that
18198 it doesn't want to clobber it. */
18199 gcc_assert (code == NOT);
18204 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18205 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18208 /* Fix up the destination if needed. */
18209 if (dst != operands[0])
18210 emit_move_insn (operands[0], dst);
18213 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18214 divisor are within the range [0-255]. */
18217 ix86_split_idivmod (machine_mode mode, rtx operands[],
18220 rtx_code_label *end_label, *qimode_label;
18221 rtx insn, div, mod;
18222 rtx scratch, tmp0, tmp1, tmp2;
18223 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18224 rtx (*gen_zero_extend) (rtx, rtx);
18225 rtx (*gen_test_ccno_1) (rtx, rtx);
18230 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18231 gen_test_ccno_1 = gen_testsi_ccno_1;
18232 gen_zero_extend = gen_zero_extendqisi2;
18235 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18236 gen_test_ccno_1 = gen_testdi_ccno_1;
18237 gen_zero_extend = gen_zero_extendqidi2;
18240 gcc_unreachable ();
18243 end_label = gen_label_rtx ();
18244 qimode_label = gen_label_rtx ();
18246 scratch = gen_reg_rtx (mode);
18248 /* Use 8bit unsigned divimod if dividend and divisor are within
18249 the range [0-255]. */
18250 emit_move_insn (scratch, operands[2]);
18251 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18252 scratch, 1, OPTAB_DIRECT);
18253 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18254 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18255 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18256 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18257 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18259 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
18260 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18261 JUMP_LABEL (insn) = qimode_label;
18263 /* Generate original signed/unsigned divimod. */
18264 div = gen_divmod4_1 (operands[0], operands[1],
18265 operands[2], operands[3]);
18268 /* Branch to the end. */
18269 emit_jump_insn (gen_jump (end_label));
18272 /* Generate 8bit unsigned divide. */
18273 emit_label (qimode_label);
18274 /* Don't use operands[0] for result of 8bit divide since not all
18275 registers support QImode ZERO_EXTRACT. */
18276 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18277 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18278 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18279 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18283 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18284 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18288 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18289 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18292 /* Extract remainder from AH. */
18293 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18294 if (REG_P (operands[1]))
18295 insn = emit_move_insn (operands[1], tmp1);
18298 /* Need a new scratch register since the old one has result
18300 scratch = gen_reg_rtx (mode);
18301 emit_move_insn (scratch, tmp1);
18302 insn = emit_move_insn (operands[1], scratch);
18304 set_unique_reg_note (insn, REG_EQUAL, mod);
18306 /* Zero extend quotient from AL. */
18307 tmp1 = gen_lowpart (QImode, tmp0);
18308 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18309 set_unique_reg_note (insn, REG_EQUAL, div);
18311 emit_label (end_label);
18314 #define LEA_MAX_STALL (3)
18315 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18317 /* Increase given DISTANCE in half-cycles according to
18318 dependencies between PREV and NEXT instructions.
18319 Add 1 half-cycle if there is no dependency and
18320 go to next cycle if there is some dependecy. */
18322 static unsigned int
18323 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18327 if (!prev || !next)
18328 return distance + (distance & 1) + 2;
18330 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18331 return distance + 1;
18333 FOR_EACH_INSN_USE (use, next)
18334 FOR_EACH_INSN_DEF (def, prev)
18335 if (!DF_REF_IS_ARTIFICIAL (def)
18336 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18337 return distance + (distance & 1) + 2;
18339 return distance + 1;
18342 /* Function checks if instruction INSN defines register number
18343 REGNO1 or REGNO2. */
18346 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18351 FOR_EACH_INSN_DEF (def, insn)
18352 if (DF_REF_REG_DEF_P (def)
18353 && !DF_REF_IS_ARTIFICIAL (def)
18354 && (regno1 == DF_REF_REGNO (def)
18355 || regno2 == DF_REF_REGNO (def)))
18361 /* Function checks if instruction INSN uses register number
18362 REGNO as a part of address expression. */
18365 insn_uses_reg_mem (unsigned int regno, rtx insn)
18369 FOR_EACH_INSN_USE (use, insn)
18370 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18376 /* Search backward for non-agu definition of register number REGNO1
18377 or register number REGNO2 in basic block starting from instruction
18378 START up to head of basic block or instruction INSN.
18380 Function puts true value into *FOUND var if definition was found
18381 and false otherwise.
18383 Distance in half-cycles between START and found instruction or head
18384 of BB is added to DISTANCE and returned. */
18387 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18388 rtx_insn *insn, int distance,
18389 rtx_insn *start, bool *found)
18391 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18392 rtx_insn *prev = start;
18393 rtx_insn *next = NULL;
18399 && distance < LEA_SEARCH_THRESHOLD)
18401 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18403 distance = increase_distance (prev, next, distance);
18404 if (insn_defines_reg (regno1, regno2, prev))
18406 if (recog_memoized (prev) < 0
18407 || get_attr_type (prev) != TYPE_LEA)
18416 if (prev == BB_HEAD (bb))
18419 prev = PREV_INSN (prev);
18425 /* Search backward for non-agu definition of register number REGNO1
18426 or register number REGNO2 in INSN's basic block until
18427 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18428 2. Reach neighbour BBs boundary, or
18429 3. Reach agu definition.
18430 Returns the distance between the non-agu definition point and INSN.
18431 If no definition point, returns -1. */
18434 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18437 basic_block bb = BLOCK_FOR_INSN (insn);
18439 bool found = false;
18441 if (insn != BB_HEAD (bb))
18442 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18443 distance, PREV_INSN (insn),
18446 if (!found && distance < LEA_SEARCH_THRESHOLD)
18450 bool simple_loop = false;
18452 FOR_EACH_EDGE (e, ei, bb->preds)
18455 simple_loop = true;
18460 distance = distance_non_agu_define_in_bb (regno1, regno2,
18462 BB_END (bb), &found);
18465 int shortest_dist = -1;
18466 bool found_in_bb = false;
18468 FOR_EACH_EDGE (e, ei, bb->preds)
18471 = distance_non_agu_define_in_bb (regno1, regno2,
18477 if (shortest_dist < 0)
18478 shortest_dist = bb_dist;
18479 else if (bb_dist > 0)
18480 shortest_dist = MIN (bb_dist, shortest_dist);
18486 distance = shortest_dist;
18490 /* get_attr_type may modify recog data. We want to make sure
18491 that recog data is valid for instruction INSN, on which
18492 distance_non_agu_define is called. INSN is unchanged here. */
18493 extract_insn_cached (insn);
18498 return distance >> 1;
18501 /* Return the distance in half-cycles between INSN and the next
18502 insn that uses register number REGNO in memory address added
18503 to DISTANCE. Return -1 if REGNO0 is set.
18505 Put true value into *FOUND if register usage was found and
18507 Put true value into *REDEFINED if register redefinition was
18508 found and false otherwise. */
18511 distance_agu_use_in_bb (unsigned int regno,
18512 rtx_insn *insn, int distance, rtx_insn *start,
18513 bool *found, bool *redefined)
18515 basic_block bb = NULL;
18516 rtx_insn *next = start;
18517 rtx_insn *prev = NULL;
18520 *redefined = false;
18522 if (start != NULL_RTX)
18524 bb = BLOCK_FOR_INSN (start);
18525 if (start != BB_HEAD (bb))
18526 /* If insn and start belong to the same bb, set prev to insn,
18527 so the call to increase_distance will increase the distance
18528 between insns by 1. */
18534 && distance < LEA_SEARCH_THRESHOLD)
18536 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18538 distance = increase_distance(prev, next, distance);
18539 if (insn_uses_reg_mem (regno, next))
18541 /* Return DISTANCE if OP0 is used in memory
18542 address in NEXT. */
18547 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18549 /* Return -1 if OP0 is set in NEXT. */
18557 if (next == BB_END (bb))
18560 next = NEXT_INSN (next);
18566 /* Return the distance between INSN and the next insn that uses
18567 register number REGNO0 in memory address. Return -1 if no such
18568 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18571 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18573 basic_block bb = BLOCK_FOR_INSN (insn);
18575 bool found = false;
18576 bool redefined = false;
18578 if (insn != BB_END (bb))
18579 distance = distance_agu_use_in_bb (regno0, insn, distance,
18581 &found, &redefined);
18583 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18587 bool simple_loop = false;
18589 FOR_EACH_EDGE (e, ei, bb->succs)
18592 simple_loop = true;
18597 distance = distance_agu_use_in_bb (regno0, insn,
18598 distance, BB_HEAD (bb),
18599 &found, &redefined);
18602 int shortest_dist = -1;
18603 bool found_in_bb = false;
18604 bool redefined_in_bb = false;
18606 FOR_EACH_EDGE (e, ei, bb->succs)
18609 = distance_agu_use_in_bb (regno0, insn,
18610 distance, BB_HEAD (e->dest),
18611 &found_in_bb, &redefined_in_bb);
18614 if (shortest_dist < 0)
18615 shortest_dist = bb_dist;
18616 else if (bb_dist > 0)
18617 shortest_dist = MIN (bb_dist, shortest_dist);
18623 distance = shortest_dist;
18627 if (!found || redefined)
18630 return distance >> 1;
18633 /* Define this macro to tune LEA priority vs ADD, it take effect when
18634 there is a dilemma of choicing LEA or ADD
18635 Negative value: ADD is more preferred than LEA
18637 Positive value: LEA is more preferred than ADD*/
18638 #define IX86_LEA_PRIORITY 0
18640 /* Return true if usage of lea INSN has performance advantage
18641 over a sequence of instructions. Instructions sequence has
18642 SPLIT_COST cycles higher latency than lea latency. */
18645 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18646 unsigned int regno2, int split_cost, bool has_scale)
18648 int dist_define, dist_use;
18650 /* For Silvermont if using a 2-source or 3-source LEA for
18651 non-destructive destination purposes, or due to wanting
18652 ability to use SCALE, the use of LEA is justified. */
18653 if (TARGET_SILVERMONT || TARGET_INTEL)
18657 if (split_cost < 1)
18659 if (regno0 == regno1 || regno0 == regno2)
18664 dist_define = distance_non_agu_define (regno1, regno2, insn);
18665 dist_use = distance_agu_use (regno0, insn);
18667 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18669 /* If there is no non AGU operand definition, no AGU
18670 operand usage and split cost is 0 then both lea
18671 and non lea variants have same priority. Currently
18672 we prefer lea for 64 bit code and non lea on 32 bit
18674 if (dist_use < 0 && split_cost == 0)
18675 return TARGET_64BIT || IX86_LEA_PRIORITY;
18680 /* With longer definitions distance lea is more preferable.
18681 Here we change it to take into account splitting cost and
18683 dist_define += split_cost + IX86_LEA_PRIORITY;
18685 /* If there is no use in memory addess then we just check
18686 that split cost exceeds AGU stall. */
18688 return dist_define > LEA_MAX_STALL;
18690 /* If this insn has both backward non-agu dependence and forward
18691 agu dependence, the one with short distance takes effect. */
18692 return dist_define >= dist_use;
18695 /* Return true if it is legal to clobber flags by INSN and
18696 false otherwise. */
18699 ix86_ok_to_clobber_flags (rtx_insn *insn)
18701 basic_block bb = BLOCK_FOR_INSN (insn);
18707 if (NONDEBUG_INSN_P (insn))
18709 FOR_EACH_INSN_USE (use, insn)
18710 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18713 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18717 if (insn == BB_END (bb))
18720 insn = NEXT_INSN (insn);
18723 live = df_get_live_out(bb);
18724 return !REGNO_REG_SET_P (live, FLAGS_REG);
18727 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18728 move and add to avoid AGU stalls. */
18731 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18733 unsigned int regno0, regno1, regno2;
18735 /* Check if we need to optimize. */
18736 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18739 /* Check it is correct to split here. */
18740 if (!ix86_ok_to_clobber_flags(insn))
18743 regno0 = true_regnum (operands[0]);
18744 regno1 = true_regnum (operands[1]);
18745 regno2 = true_regnum (operands[2]);
18747 /* We need to split only adds with non destructive
18748 destination operand. */
18749 if (regno0 == regno1 || regno0 == regno2)
18752 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18755 /* Return true if we should emit lea instruction instead of mov
18759 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18761 unsigned int regno0, regno1;
18763 /* Check if we need to optimize. */
18764 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18767 /* Use lea for reg to reg moves only. */
18768 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18771 regno0 = true_regnum (operands[0]);
18772 regno1 = true_regnum (operands[1]);
18774 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18777 /* Return true if we need to split lea into a sequence of
18778 instructions to avoid AGU stalls. */
18781 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18783 unsigned int regno0, regno1, regno2;
18785 struct ix86_address parts;
18788 /* Check we need to optimize. */
18789 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18792 /* The "at least two components" test below might not catch simple
18793 move or zero extension insns if parts.base is non-NULL and parts.disp
18794 is const0_rtx as the only components in the address, e.g. if the
18795 register is %rbp or %r13. As this test is much cheaper and moves or
18796 zero extensions are the common case, do this check first. */
18797 if (REG_P (operands[1])
18798 || (SImode_address_operand (operands[1], VOIDmode)
18799 && REG_P (XEXP (operands[1], 0))))
18802 /* Check if it is OK to split here. */
18803 if (!ix86_ok_to_clobber_flags (insn))
18806 ok = ix86_decompose_address (operands[1], &parts);
18809 /* There should be at least two components in the address. */
18810 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18811 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18814 /* We should not split into add if non legitimate pic
18815 operand is used as displacement. */
18816 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18819 regno0 = true_regnum (operands[0]) ;
18820 regno1 = INVALID_REGNUM;
18821 regno2 = INVALID_REGNUM;
18824 regno1 = true_regnum (parts.base);
18826 regno2 = true_regnum (parts.index);
18830 /* Compute how many cycles we will add to execution time
18831 if split lea into a sequence of instructions. */
18832 if (parts.base || parts.index)
18834 /* Have to use mov instruction if non desctructive
18835 destination form is used. */
18836 if (regno1 != regno0 && regno2 != regno0)
18839 /* Have to add index to base if both exist. */
18840 if (parts.base && parts.index)
18843 /* Have to use shift and adds if scale is 2 or greater. */
18844 if (parts.scale > 1)
18846 if (regno0 != regno1)
18848 else if (regno2 == regno0)
18851 split_cost += parts.scale;
18854 /* Have to use add instruction with immediate if
18855 disp is non zero. */
18856 if (parts.disp && parts.disp != const0_rtx)
18859 /* Subtract the price of lea. */
18863 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18867 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18868 matches destination. RTX includes clobber of FLAGS_REG. */
18871 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18876 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18877 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18879 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18882 /* Return true if regno1 def is nearest to the insn. */
18885 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18887 rtx_insn *prev = insn;
18888 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18892 while (prev && prev != start)
18894 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18896 prev = PREV_INSN (prev);
18899 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18901 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18903 prev = PREV_INSN (prev);
18906 /* None of the regs is defined in the bb. */
18910 /* Split lea instructions into a sequence of instructions
18911 which are executed on ALU to avoid AGU stalls.
18912 It is assumed that it is allowed to clobber flags register
18913 at lea position. */
18916 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
18918 unsigned int regno0, regno1, regno2;
18919 struct ix86_address parts;
18923 ok = ix86_decompose_address (operands[1], &parts);
18926 target = gen_lowpart (mode, operands[0]);
18928 regno0 = true_regnum (target);
18929 regno1 = INVALID_REGNUM;
18930 regno2 = INVALID_REGNUM;
18934 parts.base = gen_lowpart (mode, parts.base);
18935 regno1 = true_regnum (parts.base);
18940 parts.index = gen_lowpart (mode, parts.index);
18941 regno2 = true_regnum (parts.index);
18945 parts.disp = gen_lowpart (mode, parts.disp);
18947 if (parts.scale > 1)
18949 /* Case r1 = r1 + ... */
18950 if (regno1 == regno0)
18952 /* If we have a case r1 = r1 + C * r2 then we
18953 should use multiplication which is very
18954 expensive. Assume cost model is wrong if we
18955 have such case here. */
18956 gcc_assert (regno2 != regno0);
18958 for (adds = parts.scale; adds > 0; adds--)
18959 ix86_emit_binop (PLUS, mode, target, parts.index);
18963 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18964 if (regno0 != regno2)
18965 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18967 /* Use shift for scaling. */
18968 ix86_emit_binop (ASHIFT, mode, target,
18969 GEN_INT (exact_log2 (parts.scale)));
18972 ix86_emit_binop (PLUS, mode, target, parts.base);
18974 if (parts.disp && parts.disp != const0_rtx)
18975 ix86_emit_binop (PLUS, mode, target, parts.disp);
18978 else if (!parts.base && !parts.index)
18980 gcc_assert(parts.disp);
18981 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18987 if (regno0 != regno2)
18988 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18990 else if (!parts.index)
18992 if (regno0 != regno1)
18993 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18997 if (regno0 == regno1)
18999 else if (regno0 == regno2)
19005 /* Find better operand for SET instruction, depending
19006 on which definition is farther from the insn. */
19007 if (find_nearest_reg_def (insn, regno1, regno2))
19008 tmp = parts.index, tmp1 = parts.base;
19010 tmp = parts.base, tmp1 = parts.index;
19012 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19014 if (parts.disp && parts.disp != const0_rtx)
19015 ix86_emit_binop (PLUS, mode, target, parts.disp);
19017 ix86_emit_binop (PLUS, mode, target, tmp1);
19021 ix86_emit_binop (PLUS, mode, target, tmp);
19024 if (parts.disp && parts.disp != const0_rtx)
19025 ix86_emit_binop (PLUS, mode, target, parts.disp);
19029 /* Return true if it is ok to optimize an ADD operation to LEA
19030 operation to avoid flag register consumation. For most processors,
19031 ADD is faster than LEA. For the processors like BONNELL, if the
19032 destination register of LEA holds an actual address which will be
19033 used soon, LEA is better and otherwise ADD is better. */
19036 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
19038 unsigned int regno0 = true_regnum (operands[0]);
19039 unsigned int regno1 = true_regnum (operands[1]);
19040 unsigned int regno2 = true_regnum (operands[2]);
19042 /* If a = b + c, (a!=b && a!=c), must use lea form. */
19043 if (regno0 != regno1 && regno0 != regno2)
19046 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
19049 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
19052 /* Return true if destination reg of SET_BODY is shift count of
19056 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
19062 /* Retrieve destination of SET_BODY. */
19063 switch (GET_CODE (set_body))
19066 set_dest = SET_DEST (set_body);
19067 if (!set_dest || !REG_P (set_dest))
19071 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
19072 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
19080 /* Retrieve shift count of USE_BODY. */
19081 switch (GET_CODE (use_body))
19084 shift_rtx = XEXP (use_body, 1);
19087 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
19088 if (ix86_dep_by_shift_count_body (set_body,
19089 XVECEXP (use_body, 0, i)))
19097 && (GET_CODE (shift_rtx) == ASHIFT
19098 || GET_CODE (shift_rtx) == LSHIFTRT
19099 || GET_CODE (shift_rtx) == ASHIFTRT
19100 || GET_CODE (shift_rtx) == ROTATE
19101 || GET_CODE (shift_rtx) == ROTATERT))
19103 rtx shift_count = XEXP (shift_rtx, 1);
19105 /* Return true if shift count is dest of SET_BODY. */
19106 if (REG_P (shift_count))
19108 /* Add check since it can be invoked before register
19109 allocation in pre-reload schedule. */
19110 if (reload_completed
19111 && true_regnum (set_dest) == true_regnum (shift_count))
19113 else if (REGNO(set_dest) == REGNO(shift_count))
19121 /* Return true if destination reg of SET_INSN is shift count of
19125 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19127 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19128 PATTERN (use_insn));
19131 /* Return TRUE or FALSE depending on whether the unary operator meets the
19132 appropriate constraints. */
19135 ix86_unary_operator_ok (enum rtx_code,
19139 /* If one of operands is memory, source and destination must match. */
19140 if ((MEM_P (operands[0])
19141 || MEM_P (operands[1]))
19142 && ! rtx_equal_p (operands[0], operands[1]))
19147 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19148 are ok, keeping in mind the possible movddup alternative. */
19151 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19153 if (MEM_P (operands[0]))
19154 return rtx_equal_p (operands[0], operands[1 + high]);
19155 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19156 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19160 /* Post-reload splitter for converting an SF or DFmode value in an
19161 SSE register into an unsigned SImode. */
19164 ix86_split_convert_uns_si_sse (rtx operands[])
19166 machine_mode vecmode;
19167 rtx value, large, zero_or_two31, input, two31, x;
19169 large = operands[1];
19170 zero_or_two31 = operands[2];
19171 input = operands[3];
19172 two31 = operands[4];
19173 vecmode = GET_MODE (large);
19174 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19176 /* Load up the value into the low element. We must ensure that the other
19177 elements are valid floats -- zero is the easiest such value. */
19180 if (vecmode == V4SFmode)
19181 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19183 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19187 input = gen_rtx_REG (vecmode, REGNO (input));
19188 emit_move_insn (value, CONST0_RTX (vecmode));
19189 if (vecmode == V4SFmode)
19190 emit_insn (gen_sse_movss (value, value, input));
19192 emit_insn (gen_sse2_movsd (value, value, input));
19195 emit_move_insn (large, two31);
19196 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19198 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19199 emit_insn (gen_rtx_SET (VOIDmode, large, x));
19201 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19202 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
19204 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19205 emit_insn (gen_rtx_SET (VOIDmode, value, x));
19207 large = gen_rtx_REG (V4SImode, REGNO (large));
19208 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19210 x = gen_rtx_REG (V4SImode, REGNO (value));
19211 if (vecmode == V4SFmode)
19212 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19214 emit_insn (gen_sse2_cvttpd2dq (x, value));
19217 emit_insn (gen_xorv4si3 (value, value, large));
19220 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19221 Expects the 64-bit DImode to be supplied in a pair of integral
19222 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19223 -mfpmath=sse, !optimize_size only. */
19226 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19228 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19229 rtx int_xmm, fp_xmm;
19230 rtx biases, exponents;
19233 int_xmm = gen_reg_rtx (V4SImode);
19234 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19235 emit_insn (gen_movdi_to_sse (int_xmm, input));
19236 else if (TARGET_SSE_SPLIT_REGS)
19238 emit_clobber (int_xmm);
19239 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19243 x = gen_reg_rtx (V2DImode);
19244 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19245 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19248 x = gen_rtx_CONST_VECTOR (V4SImode,
19249 gen_rtvec (4, GEN_INT (0x43300000UL),
19250 GEN_INT (0x45300000UL),
19251 const0_rtx, const0_rtx));
19252 exponents = validize_mem (force_const_mem (V4SImode, x));
19254 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19255 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19257 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19258 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19259 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19260 (0x1.0p84 + double(fp_value_hi_xmm)).
19261 Note these exponents differ by 32. */
19263 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19265 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19266 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19267 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19268 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19269 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19270 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19271 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19272 biases = validize_mem (force_const_mem (V2DFmode, biases));
19273 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19275 /* Add the upper and lower DFmode values together. */
19277 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19280 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19281 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19282 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19285 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19288 /* Not used, but eases macroization of patterns. */
19290 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19292 gcc_unreachable ();
19295 /* Convert an unsigned SImode value into a DFmode. Only currently used
19296 for SSE, but applicable anywhere. */
19299 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19301 REAL_VALUE_TYPE TWO31r;
19304 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19305 NULL, 1, OPTAB_DIRECT);
19307 fp = gen_reg_rtx (DFmode);
19308 emit_insn (gen_floatsidf2 (fp, x));
19310 real_ldexp (&TWO31r, &dconst1, 31);
19311 x = const_double_from_real_value (TWO31r, DFmode);
19313 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19315 emit_move_insn (target, x);
19318 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19319 32-bit mode; otherwise we have a direct convert instruction. */
19322 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19324 REAL_VALUE_TYPE TWO32r;
19325 rtx fp_lo, fp_hi, x;
19327 fp_lo = gen_reg_rtx (DFmode);
19328 fp_hi = gen_reg_rtx (DFmode);
19330 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19332 real_ldexp (&TWO32r, &dconst1, 32);
19333 x = const_double_from_real_value (TWO32r, DFmode);
19334 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19336 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19338 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19341 emit_move_insn (target, x);
19344 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19345 For x86_32, -mfpmath=sse, !optimize_size only. */
19347 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19349 REAL_VALUE_TYPE ONE16r;
19350 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19352 real_ldexp (&ONE16r, &dconst1, 16);
19353 x = const_double_from_real_value (ONE16r, SFmode);
19354 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19355 NULL, 0, OPTAB_DIRECT);
19356 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19357 NULL, 0, OPTAB_DIRECT);
19358 fp_hi = gen_reg_rtx (SFmode);
19359 fp_lo = gen_reg_rtx (SFmode);
19360 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19361 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19362 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19364 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19366 if (!rtx_equal_p (target, fp_hi))
19367 emit_move_insn (target, fp_hi);
19370 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19371 a vector of unsigned ints VAL to vector of floats TARGET. */
19374 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19377 REAL_VALUE_TYPE TWO16r;
19378 machine_mode intmode = GET_MODE (val);
19379 machine_mode fltmode = GET_MODE (target);
19380 rtx (*cvt) (rtx, rtx);
19382 if (intmode == V4SImode)
19383 cvt = gen_floatv4siv4sf2;
19385 cvt = gen_floatv8siv8sf2;
19386 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19387 tmp[0] = force_reg (intmode, tmp[0]);
19388 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19390 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19391 NULL_RTX, 1, OPTAB_DIRECT);
19392 tmp[3] = gen_reg_rtx (fltmode);
19393 emit_insn (cvt (tmp[3], tmp[1]));
19394 tmp[4] = gen_reg_rtx (fltmode);
19395 emit_insn (cvt (tmp[4], tmp[2]));
19396 real_ldexp (&TWO16r, &dconst1, 16);
19397 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19398 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19399 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19401 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19403 if (tmp[7] != target)
19404 emit_move_insn (target, tmp[7]);
19407 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19408 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19409 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19410 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19413 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19415 REAL_VALUE_TYPE TWO31r;
19416 rtx two31r, tmp[4];
19417 machine_mode mode = GET_MODE (val);
19418 machine_mode scalarmode = GET_MODE_INNER (mode);
19419 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19420 rtx (*cmp) (rtx, rtx, rtx, rtx);
19423 for (i = 0; i < 3; i++)
19424 tmp[i] = gen_reg_rtx (mode);
19425 real_ldexp (&TWO31r, &dconst1, 31);
19426 two31r = const_double_from_real_value (TWO31r, scalarmode);
19427 two31r = ix86_build_const_vector (mode, 1, two31r);
19428 two31r = force_reg (mode, two31r);
19431 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19432 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19433 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19434 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19435 default: gcc_unreachable ();
19437 tmp[3] = gen_rtx_LE (mode, two31r, val);
19438 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19439 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19441 if (intmode == V4SImode || TARGET_AVX2)
19442 *xorp = expand_simple_binop (intmode, ASHIFT,
19443 gen_lowpart (intmode, tmp[0]),
19444 GEN_INT (31), NULL_RTX, 0,
19448 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
19449 two31 = ix86_build_const_vector (intmode, 1, two31);
19450 *xorp = expand_simple_binop (intmode, AND,
19451 gen_lowpart (intmode, tmp[0]),
19452 two31, NULL_RTX, 0,
19455 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19459 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19460 then replicate the value for all elements of the vector
19464 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19468 machine_mode scalar_mode;
19491 n_elt = GET_MODE_NUNITS (mode);
19492 v = rtvec_alloc (n_elt);
19493 scalar_mode = GET_MODE_INNER (mode);
19495 RTVEC_ELT (v, 0) = value;
19497 for (i = 1; i < n_elt; ++i)
19498 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19500 return gen_rtx_CONST_VECTOR (mode, v);
19503 gcc_unreachable ();
19507 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19508 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19509 for an SSE register. If VECT is true, then replicate the mask for
19510 all elements of the vector register. If INVERT is true, then create
19511 a mask excluding the sign bit. */
19514 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19516 machine_mode vec_mode, imode;
19517 HOST_WIDE_INT hi, lo;
19522 /* Find the sign bit, sign extended to 2*HWI. */
19532 mode = GET_MODE_INNER (mode);
19534 lo = 0x80000000, hi = lo < 0;
19544 mode = GET_MODE_INNER (mode);
19546 if (HOST_BITS_PER_WIDE_INT >= 64)
19547 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19549 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19554 vec_mode = VOIDmode;
19555 if (HOST_BITS_PER_WIDE_INT >= 64)
19558 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19565 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19569 lo = ~lo, hi = ~hi;
19575 mask = immed_double_const (lo, hi, imode);
19577 vec = gen_rtvec (2, v, mask);
19578 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19579 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19586 gcc_unreachable ();
19590 lo = ~lo, hi = ~hi;
19592 /* Force this value into the low part of a fp vector constant. */
19593 mask = immed_double_const (lo, hi, imode);
19594 mask = gen_lowpart (mode, mask);
19596 if (vec_mode == VOIDmode)
19597 return force_reg (mode, mask);
19599 v = ix86_build_const_vector (vec_mode, vect, mask);
19600 return force_reg (vec_mode, v);
19603 /* Generate code for floating point ABS or NEG. */
19606 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19609 rtx mask, set, dst, src;
19610 bool use_sse = false;
19611 bool vector_mode = VECTOR_MODE_P (mode);
19612 machine_mode vmode = mode;
19616 else if (mode == TFmode)
19618 else if (TARGET_SSE_MATH)
19620 use_sse = SSE_FLOAT_MODE_P (mode);
19621 if (mode == SFmode)
19623 else if (mode == DFmode)
19627 /* NEG and ABS performed with SSE use bitwise mask operations.
19628 Create the appropriate mask now. */
19630 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19637 set = gen_rtx_fmt_e (code, mode, src);
19638 set = gen_rtx_SET (VOIDmode, dst, set);
19645 use = gen_rtx_USE (VOIDmode, mask);
19647 par = gen_rtvec (2, set, use);
19650 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19651 par = gen_rtvec (3, set, use, clob);
19653 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19659 /* Expand a copysign operation. Special case operand 0 being a constant. */
19662 ix86_expand_copysign (rtx operands[])
19664 machine_mode mode, vmode;
19665 rtx dest, op0, op1, mask, nmask;
19667 dest = operands[0];
19671 mode = GET_MODE (dest);
19673 if (mode == SFmode)
19675 else if (mode == DFmode)
19680 if (GET_CODE (op0) == CONST_DOUBLE)
19682 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19684 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19685 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19687 if (mode == SFmode || mode == DFmode)
19689 if (op0 == CONST0_RTX (mode))
19690 op0 = CONST0_RTX (vmode);
19693 rtx v = ix86_build_const_vector (vmode, false, op0);
19695 op0 = force_reg (vmode, v);
19698 else if (op0 != CONST0_RTX (mode))
19699 op0 = force_reg (mode, op0);
19701 mask = ix86_build_signbit_mask (vmode, 0, 0);
19703 if (mode == SFmode)
19704 copysign_insn = gen_copysignsf3_const;
19705 else if (mode == DFmode)
19706 copysign_insn = gen_copysigndf3_const;
19708 copysign_insn = gen_copysigntf3_const;
19710 emit_insn (copysign_insn (dest, op0, op1, mask));
19714 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19716 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19717 mask = ix86_build_signbit_mask (vmode, 0, 0);
19719 if (mode == SFmode)
19720 copysign_insn = gen_copysignsf3_var;
19721 else if (mode == DFmode)
19722 copysign_insn = gen_copysigndf3_var;
19724 copysign_insn = gen_copysigntf3_var;
19726 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19730 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19731 be a constant, and so has already been expanded into a vector constant. */
19734 ix86_split_copysign_const (rtx operands[])
19736 machine_mode mode, vmode;
19737 rtx dest, op0, mask, x;
19739 dest = operands[0];
19741 mask = operands[3];
19743 mode = GET_MODE (dest);
19744 vmode = GET_MODE (mask);
19746 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19747 x = gen_rtx_AND (vmode, dest, mask);
19748 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19750 if (op0 != CONST0_RTX (vmode))
19752 x = gen_rtx_IOR (vmode, dest, op0);
19753 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19757 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19758 so we have to do two masks. */
19761 ix86_split_copysign_var (rtx operands[])
19763 machine_mode mode, vmode;
19764 rtx dest, scratch, op0, op1, mask, nmask, x;
19766 dest = operands[0];
19767 scratch = operands[1];
19770 nmask = operands[4];
19771 mask = operands[5];
19773 mode = GET_MODE (dest);
19774 vmode = GET_MODE (mask);
19776 if (rtx_equal_p (op0, op1))
19778 /* Shouldn't happen often (it's useless, obviously), but when it does
19779 we'd generate incorrect code if we continue below. */
19780 emit_move_insn (dest, op0);
19784 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19786 gcc_assert (REGNO (op1) == REGNO (scratch));
19788 x = gen_rtx_AND (vmode, scratch, mask);
19789 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19792 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19793 x = gen_rtx_NOT (vmode, dest);
19794 x = gen_rtx_AND (vmode, x, op0);
19795 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19799 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19801 x = gen_rtx_AND (vmode, scratch, mask);
19803 else /* alternative 2,4 */
19805 gcc_assert (REGNO (mask) == REGNO (scratch));
19806 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19807 x = gen_rtx_AND (vmode, scratch, op1);
19809 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19811 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19813 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19814 x = gen_rtx_AND (vmode, dest, nmask);
19816 else /* alternative 3,4 */
19818 gcc_assert (REGNO (nmask) == REGNO (dest));
19820 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19821 x = gen_rtx_AND (vmode, dest, op0);
19823 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19826 x = gen_rtx_IOR (vmode, dest, scratch);
19827 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19830 /* Return TRUE or FALSE depending on whether the first SET in INSN
19831 has source and destination with matching CC modes, and that the
19832 CC mode is at least as constrained as REQ_MODE. */
19835 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19838 machine_mode set_mode;
19840 set = PATTERN (insn);
19841 if (GET_CODE (set) == PARALLEL)
19842 set = XVECEXP (set, 0, 0);
19843 gcc_assert (GET_CODE (set) == SET);
19844 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19846 set_mode = GET_MODE (SET_DEST (set));
19850 if (req_mode != CCNOmode
19851 && (req_mode != CCmode
19852 || XEXP (SET_SRC (set), 1) != const0_rtx))
19856 if (req_mode == CCGCmode)
19860 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19864 if (req_mode == CCZmode)
19874 if (set_mode != req_mode)
19879 gcc_unreachable ();
19882 return GET_MODE (SET_SRC (set)) == set_mode;
19885 /* Generate insn patterns to do an integer compare of OPERANDS. */
19888 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19890 machine_mode cmpmode;
19893 cmpmode = SELECT_CC_MODE (code, op0, op1);
19894 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19896 /* This is very simple, but making the interface the same as in the
19897 FP case makes the rest of the code easier. */
19898 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19899 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19901 /* Return the test that should be put into the flags user, i.e.
19902 the bcc, scc, or cmov instruction. */
19903 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19906 /* Figure out whether to use ordered or unordered fp comparisons.
19907 Return the appropriate mode to use. */
19910 ix86_fp_compare_mode (enum rtx_code)
19912 /* ??? In order to make all comparisons reversible, we do all comparisons
19913 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19914 all forms trapping and nontrapping comparisons, we can make inequality
19915 comparisons trapping again, since it results in better code when using
19916 FCOM based compares. */
19917 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19921 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19923 machine_mode mode = GET_MODE (op0);
19925 if (SCALAR_FLOAT_MODE_P (mode))
19927 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19928 return ix86_fp_compare_mode (code);
19933 /* Only zero flag is needed. */
19934 case EQ: /* ZF=0 */
19935 case NE: /* ZF!=0 */
19937 /* Codes needing carry flag. */
19938 case GEU: /* CF=0 */
19939 case LTU: /* CF=1 */
19940 /* Detect overflow checks. They need just the carry flag. */
19941 if (GET_CODE (op0) == PLUS
19942 && rtx_equal_p (op1, XEXP (op0, 0)))
19946 case GTU: /* CF=0 & ZF=0 */
19947 case LEU: /* CF=1 | ZF=1 */
19949 /* Codes possibly doable only with sign flag when
19950 comparing against zero. */
19951 case GE: /* SF=OF or SF=0 */
19952 case LT: /* SF<>OF or SF=1 */
19953 if (op1 == const0_rtx)
19956 /* For other cases Carry flag is not required. */
19958 /* Codes doable only with sign flag when comparing
19959 against zero, but we miss jump instruction for it
19960 so we need to use relational tests against overflow
19961 that thus needs to be zero. */
19962 case GT: /* ZF=0 & SF=OF */
19963 case LE: /* ZF=1 | SF<>OF */
19964 if (op1 == const0_rtx)
19968 /* strcmp pattern do (use flags) and combine may ask us for proper
19973 gcc_unreachable ();
19977 /* Return the fixed registers used for condition codes. */
19980 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19987 /* If two condition code modes are compatible, return a condition code
19988 mode which is compatible with both. Otherwise, return
19991 static machine_mode
19992 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
19997 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
20000 if ((m1 == CCGCmode && m2 == CCGOCmode)
20001 || (m1 == CCGOCmode && m2 == CCGCmode))
20004 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
20006 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
20012 gcc_unreachable ();
20042 /* These are only compatible with themselves, which we already
20049 /* Return a comparison we can do and that it is equivalent to
20050 swap_condition (code) apart possibly from orderedness.
20051 But, never change orderedness if TARGET_IEEE_FP, returning
20052 UNKNOWN in that case if necessary. */
20054 static enum rtx_code
20055 ix86_fp_swap_condition (enum rtx_code code)
20059 case GT: /* GTU - CF=0 & ZF=0 */
20060 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
20061 case GE: /* GEU - CF=0 */
20062 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
20063 case UNLT: /* LTU - CF=1 */
20064 return TARGET_IEEE_FP ? UNKNOWN : GT;
20065 case UNLE: /* LEU - CF=1 | ZF=1 */
20066 return TARGET_IEEE_FP ? UNKNOWN : GE;
20068 return swap_condition (code);
20072 /* Return cost of comparison CODE using the best strategy for performance.
20073 All following functions do use number of instructions as a cost metrics.
20074 In future this should be tweaked to compute bytes for optimize_size and
20075 take into account performance of various instructions on various CPUs. */
20078 ix86_fp_comparison_cost (enum rtx_code code)
20082 /* The cost of code using bit-twiddling on %ah. */
20099 arith_cost = TARGET_IEEE_FP ? 5 : 4;
20103 arith_cost = TARGET_IEEE_FP ? 6 : 4;
20106 gcc_unreachable ();
20109 switch (ix86_fp_comparison_strategy (code))
20111 case IX86_FPCMP_COMI:
20112 return arith_cost > 4 ? 3 : 2;
20113 case IX86_FPCMP_SAHF:
20114 return arith_cost > 4 ? 4 : 3;
20120 /* Return strategy to use for floating-point. We assume that fcomi is always
20121 preferrable where available, since that is also true when looking at size
20122 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20124 enum ix86_fpcmp_strategy
20125 ix86_fp_comparison_strategy (enum rtx_code)
20127 /* Do fcomi/sahf based test when profitable. */
20130 return IX86_FPCMP_COMI;
20132 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20133 return IX86_FPCMP_SAHF;
20135 return IX86_FPCMP_ARITH;
20138 /* Swap, force into registers, or otherwise massage the two operands
20139 to a fp comparison. The operands are updated in place; the new
20140 comparison code is returned. */
20142 static enum rtx_code
20143 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20145 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20146 rtx op0 = *pop0, op1 = *pop1;
20147 machine_mode op_mode = GET_MODE (op0);
20148 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20150 /* All of the unordered compare instructions only work on registers.
20151 The same is true of the fcomi compare instructions. The XFmode
20152 compare instructions require registers except when comparing
20153 against zero or when converting operand 1 from fixed point to
20157 && (fpcmp_mode == CCFPUmode
20158 || (op_mode == XFmode
20159 && ! (standard_80387_constant_p (op0) == 1
20160 || standard_80387_constant_p (op1) == 1)
20161 && GET_CODE (op1) != FLOAT)
20162 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20164 op0 = force_reg (op_mode, op0);
20165 op1 = force_reg (op_mode, op1);
20169 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20170 things around if they appear profitable, otherwise force op0
20171 into a register. */
20173 if (standard_80387_constant_p (op0) == 0
20175 && ! (standard_80387_constant_p (op1) == 0
20178 enum rtx_code new_code = ix86_fp_swap_condition (code);
20179 if (new_code != UNKNOWN)
20181 std::swap (op0, op1);
20187 op0 = force_reg (op_mode, op0);
20189 if (CONSTANT_P (op1))
20191 int tmp = standard_80387_constant_p (op1);
20193 op1 = validize_mem (force_const_mem (op_mode, op1));
20197 op1 = force_reg (op_mode, op1);
20200 op1 = force_reg (op_mode, op1);
20204 /* Try to rearrange the comparison to make it cheaper. */
20205 if (ix86_fp_comparison_cost (code)
20206 > ix86_fp_comparison_cost (swap_condition (code))
20207 && (REG_P (op1) || can_create_pseudo_p ()))
20209 std::swap (op0, op1);
20210 code = swap_condition (code);
20212 op0 = force_reg (op_mode, op0);
20220 /* Convert comparison codes we use to represent FP comparison to integer
20221 code that will result in proper branch. Return UNKNOWN if no such code
20225 ix86_fp_compare_code_to_integer (enum rtx_code code)
20254 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20257 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20259 machine_mode fpcmp_mode, intcmp_mode;
20262 fpcmp_mode = ix86_fp_compare_mode (code);
20263 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20265 /* Do fcomi/sahf based test when profitable. */
20266 switch (ix86_fp_comparison_strategy (code))
20268 case IX86_FPCMP_COMI:
20269 intcmp_mode = fpcmp_mode;
20270 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20271 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20276 case IX86_FPCMP_SAHF:
20277 intcmp_mode = fpcmp_mode;
20278 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20279 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20283 scratch = gen_reg_rtx (HImode);
20284 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20285 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20288 case IX86_FPCMP_ARITH:
20289 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20290 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20291 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20293 scratch = gen_reg_rtx (HImode);
20294 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
20296 /* In the unordered case, we have to check C2 for NaN's, which
20297 doesn't happen to work out to anything nice combination-wise.
20298 So do some bit twiddling on the value we've got in AH to come
20299 up with an appropriate set of condition codes. */
20301 intcmp_mode = CCNOmode;
20306 if (code == GT || !TARGET_IEEE_FP)
20308 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20313 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20314 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20315 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20316 intcmp_mode = CCmode;
20322 if (code == LT && TARGET_IEEE_FP)
20324 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20325 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20326 intcmp_mode = CCmode;
20331 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20337 if (code == GE || !TARGET_IEEE_FP)
20339 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20344 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20345 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20351 if (code == LE && TARGET_IEEE_FP)
20353 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20354 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20355 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20356 intcmp_mode = CCmode;
20361 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20367 if (code == EQ && TARGET_IEEE_FP)
20369 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20370 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20371 intcmp_mode = CCmode;
20376 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20382 if (code == NE && TARGET_IEEE_FP)
20384 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20385 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20391 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20397 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20401 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20406 gcc_unreachable ();
20414 /* Return the test that should be put into the flags user, i.e.
20415 the bcc, scc, or cmov instruction. */
20416 return gen_rtx_fmt_ee (code, VOIDmode,
20417 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20422 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20426 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20427 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20429 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20431 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20432 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20435 ret = ix86_expand_int_compare (code, op0, op1);
20441 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20443 machine_mode mode = GET_MODE (op0);
20455 tmp = ix86_expand_compare (code, op0, op1);
20456 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20457 gen_rtx_LABEL_REF (VOIDmode, label),
20459 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20466 /* Expand DImode branch into multiple compare+branch. */
20469 rtx_code_label *label2;
20470 enum rtx_code code1, code2, code3;
20471 machine_mode submode;
20473 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20475 std::swap (op0, op1);
20476 code = swap_condition (code);
20479 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20480 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20482 submode = mode == DImode ? SImode : DImode;
20484 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20485 avoid two branches. This costs one extra insn, so disable when
20486 optimizing for size. */
20488 if ((code == EQ || code == NE)
20489 && (!optimize_insn_for_size_p ()
20490 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20495 if (hi[1] != const0_rtx)
20496 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20497 NULL_RTX, 0, OPTAB_WIDEN);
20500 if (lo[1] != const0_rtx)
20501 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20502 NULL_RTX, 0, OPTAB_WIDEN);
20504 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20505 NULL_RTX, 0, OPTAB_WIDEN);
20507 ix86_expand_branch (code, tmp, const0_rtx, label);
20511 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20512 op1 is a constant and the low word is zero, then we can just
20513 examine the high word. Similarly for low word -1 and
20514 less-or-equal-than or greater-than. */
20516 if (CONST_INT_P (hi[1]))
20519 case LT: case LTU: case GE: case GEU:
20520 if (lo[1] == const0_rtx)
20522 ix86_expand_branch (code, hi[0], hi[1], label);
20526 case LE: case LEU: case GT: case GTU:
20527 if (lo[1] == constm1_rtx)
20529 ix86_expand_branch (code, hi[0], hi[1], label);
20537 /* Otherwise, we need two or three jumps. */
20539 label2 = gen_label_rtx ();
20542 code2 = swap_condition (code);
20543 code3 = unsigned_condition (code);
20547 case LT: case GT: case LTU: case GTU:
20550 case LE: code1 = LT; code2 = GT; break;
20551 case GE: code1 = GT; code2 = LT; break;
20552 case LEU: code1 = LTU; code2 = GTU; break;
20553 case GEU: code1 = GTU; code2 = LTU; break;
20555 case EQ: code1 = UNKNOWN; code2 = NE; break;
20556 case NE: code2 = UNKNOWN; break;
20559 gcc_unreachable ();
20564 * if (hi(a) < hi(b)) goto true;
20565 * if (hi(a) > hi(b)) goto false;
20566 * if (lo(a) < lo(b)) goto true;
20570 if (code1 != UNKNOWN)
20571 ix86_expand_branch (code1, hi[0], hi[1], label);
20572 if (code2 != UNKNOWN)
20573 ix86_expand_branch (code2, hi[0], hi[1], label2);
20575 ix86_expand_branch (code3, lo[0], lo[1], label);
20577 if (code2 != UNKNOWN)
20578 emit_label (label2);
20583 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20588 /* Split branch based on floating point condition. */
20590 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20591 rtx target1, rtx target2, rtx tmp)
20596 if (target2 != pc_rtx)
20598 std::swap (target1, target2);
20599 code = reverse_condition_maybe_unordered (code);
20602 condition = ix86_expand_fp_compare (code, op1, op2,
20605 i = emit_jump_insn (gen_rtx_SET
20607 gen_rtx_IF_THEN_ELSE (VOIDmode,
20608 condition, target1, target2)));
20609 if (split_branch_probability >= 0)
20610 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20614 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20618 gcc_assert (GET_MODE (dest) == QImode);
20620 ret = ix86_expand_compare (code, op0, op1);
20621 PUT_MODE (ret, QImode);
20622 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20625 /* Expand comparison setting or clearing carry flag. Return true when
20626 successful and set pop for the operation. */
20628 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20630 machine_mode mode =
20631 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20633 /* Do not handle double-mode compares that go through special path. */
20634 if (mode == (TARGET_64BIT ? TImode : DImode))
20637 if (SCALAR_FLOAT_MODE_P (mode))
20640 rtx_insn *compare_seq;
20642 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20644 /* Shortcut: following common codes never translate
20645 into carry flag compares. */
20646 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20647 || code == ORDERED || code == UNORDERED)
20650 /* These comparisons require zero flag; swap operands so they won't. */
20651 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20652 && !TARGET_IEEE_FP)
20654 std::swap (op0, op1);
20655 code = swap_condition (code);
20658 /* Try to expand the comparison and verify that we end up with
20659 carry flag based comparison. This fails to be true only when
20660 we decide to expand comparison using arithmetic that is not
20661 too common scenario. */
20663 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20664 compare_seq = get_insns ();
20667 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20668 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20669 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20671 code = GET_CODE (compare_op);
20673 if (code != LTU && code != GEU)
20676 emit_insn (compare_seq);
20681 if (!INTEGRAL_MODE_P (mode))
20690 /* Convert a==0 into (unsigned)a<1. */
20693 if (op1 != const0_rtx)
20696 code = (code == EQ ? LTU : GEU);
20699 /* Convert a>b into b<a or a>=b-1. */
20702 if (CONST_INT_P (op1))
20704 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20705 /* Bail out on overflow. We still can swap operands but that
20706 would force loading of the constant into register. */
20707 if (op1 == const0_rtx
20708 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20710 code = (code == GTU ? GEU : LTU);
20714 std::swap (op0, op1);
20715 code = (code == GTU ? LTU : GEU);
20719 /* Convert a>=0 into (unsigned)a<0x80000000. */
20722 if (mode == DImode || op1 != const0_rtx)
20724 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20725 code = (code == LT ? GEU : LTU);
20729 if (mode == DImode || op1 != constm1_rtx)
20731 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20732 code = (code == LE ? GEU : LTU);
20738 /* Swapping operands may cause constant to appear as first operand. */
20739 if (!nonimmediate_operand (op0, VOIDmode))
20741 if (!can_create_pseudo_p ())
20743 op0 = force_reg (mode, op0);
20745 *pop = ix86_expand_compare (code, op0, op1);
20746 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20751 ix86_expand_int_movcc (rtx operands[])
20753 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20754 rtx_insn *compare_seq;
20756 machine_mode mode = GET_MODE (operands[0]);
20757 bool sign_bit_compare_p = false;
20758 rtx op0 = XEXP (operands[1], 0);
20759 rtx op1 = XEXP (operands[1], 1);
20761 if (GET_MODE (op0) == TImode
20762 || (GET_MODE (op0) == DImode
20767 compare_op = ix86_expand_compare (code, op0, op1);
20768 compare_seq = get_insns ();
20771 compare_code = GET_CODE (compare_op);
20773 if ((op1 == const0_rtx && (code == GE || code == LT))
20774 || (op1 == constm1_rtx && (code == GT || code == LE)))
20775 sign_bit_compare_p = true;
20777 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20778 HImode insns, we'd be swallowed in word prefix ops. */
20780 if ((mode != HImode || TARGET_FAST_PREFIX)
20781 && (mode != (TARGET_64BIT ? TImode : DImode))
20782 && CONST_INT_P (operands[2])
20783 && CONST_INT_P (operands[3]))
20785 rtx out = operands[0];
20786 HOST_WIDE_INT ct = INTVAL (operands[2]);
20787 HOST_WIDE_INT cf = INTVAL (operands[3]);
20788 HOST_WIDE_INT diff;
20791 /* Sign bit compares are better done using shifts than we do by using
20793 if (sign_bit_compare_p
20794 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20796 /* Detect overlap between destination and compare sources. */
20799 if (!sign_bit_compare_p)
20802 bool fpcmp = false;
20804 compare_code = GET_CODE (compare_op);
20806 flags = XEXP (compare_op, 0);
20808 if (GET_MODE (flags) == CCFPmode
20809 || GET_MODE (flags) == CCFPUmode)
20813 = ix86_fp_compare_code_to_integer (compare_code);
20816 /* To simplify rest of code, restrict to the GEU case. */
20817 if (compare_code == LTU)
20819 std::swap (ct, cf);
20820 compare_code = reverse_condition (compare_code);
20821 code = reverse_condition (code);
20826 PUT_CODE (compare_op,
20827 reverse_condition_maybe_unordered
20828 (GET_CODE (compare_op)));
20830 PUT_CODE (compare_op,
20831 reverse_condition (GET_CODE (compare_op)));
20835 if (reg_overlap_mentioned_p (out, op0)
20836 || reg_overlap_mentioned_p (out, op1))
20837 tmp = gen_reg_rtx (mode);
20839 if (mode == DImode)
20840 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20842 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20843 flags, compare_op));
20847 if (code == GT || code == GE)
20848 code = reverse_condition (code);
20851 std::swap (ct, cf);
20854 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20867 tmp = expand_simple_binop (mode, PLUS,
20869 copy_rtx (tmp), 1, OPTAB_DIRECT);
20880 tmp = expand_simple_binop (mode, IOR,
20882 copy_rtx (tmp), 1, OPTAB_DIRECT);
20884 else if (diff == -1 && ct)
20894 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20896 tmp = expand_simple_binop (mode, PLUS,
20897 copy_rtx (tmp), GEN_INT (cf),
20898 copy_rtx (tmp), 1, OPTAB_DIRECT);
20906 * andl cf - ct, dest
20916 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20919 tmp = expand_simple_binop (mode, AND,
20921 gen_int_mode (cf - ct, mode),
20922 copy_rtx (tmp), 1, OPTAB_DIRECT);
20924 tmp = expand_simple_binop (mode, PLUS,
20925 copy_rtx (tmp), GEN_INT (ct),
20926 copy_rtx (tmp), 1, OPTAB_DIRECT);
20929 if (!rtx_equal_p (tmp, out))
20930 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20937 machine_mode cmp_mode = GET_MODE (op0);
20938 enum rtx_code new_code;
20940 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20942 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20944 /* We may be reversing unordered compare to normal compare, that
20945 is not valid in general (we may convert non-trapping condition
20946 to trapping one), however on i386 we currently emit all
20947 comparisons unordered. */
20948 new_code = reverse_condition_maybe_unordered (code);
20951 new_code = ix86_reverse_condition (code, cmp_mode);
20952 if (new_code != UNKNOWN)
20954 std::swap (ct, cf);
20960 compare_code = UNKNOWN;
20961 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20962 && CONST_INT_P (op1))
20964 if (op1 == const0_rtx
20965 && (code == LT || code == GE))
20966 compare_code = code;
20967 else if (op1 == constm1_rtx)
20971 else if (code == GT)
20976 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20977 if (compare_code != UNKNOWN
20978 && GET_MODE (op0) == GET_MODE (out)
20979 && (cf == -1 || ct == -1))
20981 /* If lea code below could be used, only optimize
20982 if it results in a 2 insn sequence. */
20984 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20985 || diff == 3 || diff == 5 || diff == 9)
20986 || (compare_code == LT && ct == -1)
20987 || (compare_code == GE && cf == -1))
20990 * notl op1 (if necessary)
20998 code = reverse_condition (code);
21001 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21003 out = expand_simple_binop (mode, IOR,
21005 out, 1, OPTAB_DIRECT);
21006 if (out != operands[0])
21007 emit_move_insn (operands[0], out);
21014 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
21015 || diff == 3 || diff == 5 || diff == 9)
21016 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
21018 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
21024 * lea cf(dest*(ct-cf)),dest
21028 * This also catches the degenerate setcc-only case.
21034 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21037 /* On x86_64 the lea instruction operates on Pmode, so we need
21038 to get arithmetics done in proper mode to match. */
21040 tmp = copy_rtx (out);
21044 out1 = copy_rtx (out);
21045 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
21049 tmp = gen_rtx_PLUS (mode, tmp, out1);
21055 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
21058 if (!rtx_equal_p (tmp, out))
21061 out = force_operand (tmp, copy_rtx (out));
21063 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
21065 if (!rtx_equal_p (out, operands[0]))
21066 emit_move_insn (operands[0], copy_rtx (out));
21072 * General case: Jumpful:
21073 * xorl dest,dest cmpl op1, op2
21074 * cmpl op1, op2 movl ct, dest
21075 * setcc dest jcc 1f
21076 * decl dest movl cf, dest
21077 * andl (cf-ct),dest 1:
21080 * Size 20. Size 14.
21082 * This is reasonably steep, but branch mispredict costs are
21083 * high on modern cpus, so consider failing only if optimizing
21087 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21088 && BRANCH_COST (optimize_insn_for_speed_p (),
21093 machine_mode cmp_mode = GET_MODE (op0);
21094 enum rtx_code new_code;
21096 if (SCALAR_FLOAT_MODE_P (cmp_mode))
21098 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21100 /* We may be reversing unordered compare to normal compare,
21101 that is not valid in general (we may convert non-trapping
21102 condition to trapping one), however on i386 we currently
21103 emit all comparisons unordered. */
21104 new_code = reverse_condition_maybe_unordered (code);
21108 new_code = ix86_reverse_condition (code, cmp_mode);
21109 if (compare_code != UNKNOWN && new_code != UNKNOWN)
21110 compare_code = reverse_condition (compare_code);
21113 if (new_code != UNKNOWN)
21121 if (compare_code != UNKNOWN)
21123 /* notl op1 (if needed)
21128 For x < 0 (resp. x <= -1) there will be no notl,
21129 so if possible swap the constants to get rid of the
21131 True/false will be -1/0 while code below (store flag
21132 followed by decrement) is 0/-1, so the constants need
21133 to be exchanged once more. */
21135 if (compare_code == GE || !cf)
21137 code = reverse_condition (code);
21141 std::swap (ct, cf);
21143 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21147 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21149 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21151 copy_rtx (out), 1, OPTAB_DIRECT);
21154 out = expand_simple_binop (mode, AND, copy_rtx (out),
21155 gen_int_mode (cf - ct, mode),
21156 copy_rtx (out), 1, OPTAB_DIRECT);
21158 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21159 copy_rtx (out), 1, OPTAB_DIRECT);
21160 if (!rtx_equal_p (out, operands[0]))
21161 emit_move_insn (operands[0], copy_rtx (out));
21167 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21169 /* Try a few things more with specific constants and a variable. */
21172 rtx var, orig_out, out, tmp;
21174 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21177 /* If one of the two operands is an interesting constant, load a
21178 constant with the above and mask it in with a logical operation. */
21180 if (CONST_INT_P (operands[2]))
21183 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21184 operands[3] = constm1_rtx, op = and_optab;
21185 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21186 operands[3] = const0_rtx, op = ior_optab;
21190 else if (CONST_INT_P (operands[3]))
21193 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21194 operands[2] = constm1_rtx, op = and_optab;
21195 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21196 operands[2] = const0_rtx, op = ior_optab;
21203 orig_out = operands[0];
21204 tmp = gen_reg_rtx (mode);
21207 /* Recurse to get the constant loaded. */
21208 if (ix86_expand_int_movcc (operands) == 0)
21211 /* Mask in the interesting variable. */
21212 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21214 if (!rtx_equal_p (out, orig_out))
21215 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21221 * For comparison with above,
21231 if (! nonimmediate_operand (operands[2], mode))
21232 operands[2] = force_reg (mode, operands[2]);
21233 if (! nonimmediate_operand (operands[3], mode))
21234 operands[3] = force_reg (mode, operands[3]);
21236 if (! register_operand (operands[2], VOIDmode)
21238 || ! register_operand (operands[3], VOIDmode)))
21239 operands[2] = force_reg (mode, operands[2]);
21242 && ! register_operand (operands[3], VOIDmode))
21243 operands[3] = force_reg (mode, operands[3]);
21245 emit_insn (compare_seq);
21246 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21247 gen_rtx_IF_THEN_ELSE (mode,
21248 compare_op, operands[2],
21253 /* Swap, force into registers, or otherwise massage the two operands
21254 to an sse comparison with a mask result. Thus we differ a bit from
21255 ix86_prepare_fp_compare_args which expects to produce a flags result.
21257 The DEST operand exists to help determine whether to commute commutative
21258 operators. The POP0/POP1 operands are updated in place. The new
21259 comparison code is returned, or UNKNOWN if not implementable. */
21261 static enum rtx_code
21262 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21263 rtx *pop0, rtx *pop1)
21269 /* AVX supports all the needed comparisons. */
21272 /* We have no LTGT as an operator. We could implement it with
21273 NE & ORDERED, but this requires an extra temporary. It's
21274 not clear that it's worth it. */
21281 /* These are supported directly. */
21288 /* AVX has 3 operand comparisons, no need to swap anything. */
21291 /* For commutative operators, try to canonicalize the destination
21292 operand to be first in the comparison - this helps reload to
21293 avoid extra moves. */
21294 if (!dest || !rtx_equal_p (dest, *pop1))
21302 /* These are not supported directly before AVX, and furthermore
21303 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21304 comparison operands to transform into something that is
21306 std::swap (*pop0, *pop1);
21307 code = swap_condition (code);
21311 gcc_unreachable ();
21317 /* Detect conditional moves that exactly match min/max operational
21318 semantics. Note that this is IEEE safe, as long as we don't
21319 interchange the operands.
21321 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21322 and TRUE if the operation is successful and instructions are emitted. */
21325 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21326 rtx cmp_op1, rtx if_true, rtx if_false)
21334 else if (code == UNGE)
21335 std::swap (if_true, if_false);
21339 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21341 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21346 mode = GET_MODE (dest);
21348 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21349 but MODE may be a vector mode and thus not appropriate. */
21350 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21352 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21355 if_true = force_reg (mode, if_true);
21356 v = gen_rtvec (2, if_true, if_false);
21357 tmp = gen_rtx_UNSPEC (mode, v, u);
21361 code = is_min ? SMIN : SMAX;
21362 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21365 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
21369 /* Expand an sse vector comparison. Return the register with the result. */
21372 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21373 rtx op_true, rtx op_false)
21375 machine_mode mode = GET_MODE (dest);
21376 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21378 /* In general case result of comparison can differ from operands' type. */
21379 machine_mode cmp_mode;
21381 /* In AVX512F the result of comparison is an integer mask. */
21382 bool maskcmp = false;
21385 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21387 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21388 gcc_assert (cmp_mode != BLKmode);
21393 cmp_mode = cmp_ops_mode;
21396 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21397 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21398 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21401 || reg_overlap_mentioned_p (dest, op_true)
21402 || reg_overlap_mentioned_p (dest, op_false))
21403 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21405 /* Compare patterns for int modes are unspec in AVX512F only. */
21406 if (maskcmp && (code == GT || code == EQ))
21408 rtx (*gen)(rtx, rtx, rtx);
21410 switch (cmp_ops_mode)
21413 gcc_assert (TARGET_AVX512BW);
21414 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
21417 gcc_assert (TARGET_AVX512BW);
21418 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
21421 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21424 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21432 emit_insn (gen (dest, cmp_op0, cmp_op1));
21436 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21438 if (cmp_mode != mode && !maskcmp)
21440 x = force_reg (cmp_ops_mode, x);
21441 convert_move (dest, x, false);
21444 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21449 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21450 operations. This is used for both scalar and vector conditional moves. */
21453 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21455 machine_mode mode = GET_MODE (dest);
21456 machine_mode cmpmode = GET_MODE (cmp);
21458 /* In AVX512F the result of comparison is an integer mask. */
21459 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21463 if (vector_all_ones_operand (op_true, mode)
21464 && rtx_equal_p (op_false, CONST0_RTX (mode))
21467 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
21469 else if (op_false == CONST0_RTX (mode)
21472 op_true = force_reg (mode, op_true);
21473 x = gen_rtx_AND (mode, cmp, op_true);
21474 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21476 else if (op_true == CONST0_RTX (mode)
21479 op_false = force_reg (mode, op_false);
21480 x = gen_rtx_NOT (mode, cmp);
21481 x = gen_rtx_AND (mode, x, op_false);
21482 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21484 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21487 op_false = force_reg (mode, op_false);
21488 x = gen_rtx_IOR (mode, cmp, op_false);
21489 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21491 else if (TARGET_XOP
21494 op_true = force_reg (mode, op_true);
21496 if (!nonimmediate_operand (op_false, mode))
21497 op_false = force_reg (mode, op_false);
21499 emit_insn (gen_rtx_SET (mode, dest,
21500 gen_rtx_IF_THEN_ELSE (mode, cmp,
21506 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21509 if (!nonimmediate_operand (op_true, mode))
21510 op_true = force_reg (mode, op_true);
21512 op_false = force_reg (mode, op_false);
21518 gen = gen_sse4_1_blendvps;
21522 gen = gen_sse4_1_blendvpd;
21530 gen = gen_sse4_1_pblendvb;
21531 if (mode != V16QImode)
21532 d = gen_reg_rtx (V16QImode);
21533 op_false = gen_lowpart (V16QImode, op_false);
21534 op_true = gen_lowpart (V16QImode, op_true);
21535 cmp = gen_lowpart (V16QImode, cmp);
21540 gen = gen_avx_blendvps256;
21544 gen = gen_avx_blendvpd256;
21552 gen = gen_avx2_pblendvb;
21553 if (mode != V32QImode)
21554 d = gen_reg_rtx (V32QImode);
21555 op_false = gen_lowpart (V32QImode, op_false);
21556 op_true = gen_lowpart (V32QImode, op_true);
21557 cmp = gen_lowpart (V32QImode, cmp);
21562 gen = gen_avx512bw_blendmv64qi;
21565 gen = gen_avx512bw_blendmv32hi;
21568 gen = gen_avx512f_blendmv16si;
21571 gen = gen_avx512f_blendmv8di;
21574 gen = gen_avx512f_blendmv8df;
21577 gen = gen_avx512f_blendmv16sf;
21586 emit_insn (gen (d, op_false, op_true, cmp));
21588 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21592 op_true = force_reg (mode, op_true);
21594 t2 = gen_reg_rtx (mode);
21596 t3 = gen_reg_rtx (mode);
21600 x = gen_rtx_AND (mode, op_true, cmp);
21601 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21603 x = gen_rtx_NOT (mode, cmp);
21604 x = gen_rtx_AND (mode, x, op_false);
21605 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21607 x = gen_rtx_IOR (mode, t3, t2);
21608 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21613 /* Expand a floating-point conditional move. Return true if successful. */
21616 ix86_expand_fp_movcc (rtx operands[])
21618 machine_mode mode = GET_MODE (operands[0]);
21619 enum rtx_code code = GET_CODE (operands[1]);
21620 rtx tmp, compare_op;
21621 rtx op0 = XEXP (operands[1], 0);
21622 rtx op1 = XEXP (operands[1], 1);
21624 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21626 machine_mode cmode;
21628 /* Since we've no cmove for sse registers, don't force bad register
21629 allocation just to gain access to it. Deny movcc when the
21630 comparison mode doesn't match the move mode. */
21631 cmode = GET_MODE (op0);
21632 if (cmode == VOIDmode)
21633 cmode = GET_MODE (op1);
21637 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21638 if (code == UNKNOWN)
21641 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21642 operands[2], operands[3]))
21645 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21646 operands[2], operands[3]);
21647 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21651 if (GET_MODE (op0) == TImode
21652 || (GET_MODE (op0) == DImode
21656 /* The floating point conditional move instructions don't directly
21657 support conditions resulting from a signed integer comparison. */
21659 compare_op = ix86_expand_compare (code, op0, op1);
21660 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21662 tmp = gen_reg_rtx (QImode);
21663 ix86_expand_setcc (tmp, code, op0, op1);
21665 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21668 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21669 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21670 operands[2], operands[3])));
21675 /* Expand a floating-point vector conditional move; a vcond operation
21676 rather than a movcc operation. */
21679 ix86_expand_fp_vcond (rtx operands[])
21681 enum rtx_code code = GET_CODE (operands[3]);
21684 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21685 &operands[4], &operands[5]);
21686 if (code == UNKNOWN)
21689 switch (GET_CODE (operands[3]))
21692 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21693 operands[5], operands[0], operands[0]);
21694 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21695 operands[5], operands[1], operands[2]);
21699 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21700 operands[5], operands[0], operands[0]);
21701 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21702 operands[5], operands[1], operands[2]);
21706 gcc_unreachable ();
21708 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21710 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21714 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21715 operands[5], operands[1], operands[2]))
21718 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21719 operands[1], operands[2]);
21720 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21724 /* Expand a signed/unsigned integral vector conditional move. */
21727 ix86_expand_int_vcond (rtx operands[])
21729 machine_mode data_mode = GET_MODE (operands[0]);
21730 machine_mode mode = GET_MODE (operands[4]);
21731 enum rtx_code code = GET_CODE (operands[3]);
21732 bool negate = false;
21735 cop0 = operands[4];
21736 cop1 = operands[5];
21738 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21739 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21740 if ((code == LT || code == GE)
21741 && data_mode == mode
21742 && cop1 == CONST0_RTX (mode)
21743 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21744 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21745 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21746 && (GET_MODE_SIZE (data_mode) == 16
21747 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21749 rtx negop = operands[2 - (code == LT)];
21750 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21751 if (negop == CONST1_RTX (data_mode))
21753 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21754 operands[0], 1, OPTAB_DIRECT);
21755 if (res != operands[0])
21756 emit_move_insn (operands[0], res);
21759 else if (GET_MODE_INNER (data_mode) != DImode
21760 && vector_all_ones_operand (negop, data_mode))
21762 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21763 operands[0], 0, OPTAB_DIRECT);
21764 if (res != operands[0])
21765 emit_move_insn (operands[0], res);
21770 if (!nonimmediate_operand (cop1, mode))
21771 cop1 = force_reg (mode, cop1);
21772 if (!general_operand (operands[1], data_mode))
21773 operands[1] = force_reg (data_mode, operands[1]);
21774 if (!general_operand (operands[2], data_mode))
21775 operands[2] = force_reg (data_mode, operands[2]);
21777 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21779 && (mode == V16QImode || mode == V8HImode
21780 || mode == V4SImode || mode == V2DImode))
21784 /* Canonicalize the comparison to EQ, GT, GTU. */
21795 code = reverse_condition (code);
21801 code = reverse_condition (code);
21807 std::swap (cop0, cop1);
21808 code = swap_condition (code);
21812 gcc_unreachable ();
21815 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21816 if (mode == V2DImode)
21821 /* SSE4.1 supports EQ. */
21822 if (!TARGET_SSE4_1)
21828 /* SSE4.2 supports GT/GTU. */
21829 if (!TARGET_SSE4_2)
21834 gcc_unreachable ();
21838 /* Unsigned parallel compare is not supported by the hardware.
21839 Play some tricks to turn this into a signed comparison
21843 cop0 = force_reg (mode, cop0);
21855 rtx (*gen_sub3) (rtx, rtx, rtx);
21859 case V16SImode: gen_sub3 = gen_subv16si3; break;
21860 case V8DImode: gen_sub3 = gen_subv8di3; break;
21861 case V8SImode: gen_sub3 = gen_subv8si3; break;
21862 case V4DImode: gen_sub3 = gen_subv4di3; break;
21863 case V4SImode: gen_sub3 = gen_subv4si3; break;
21864 case V2DImode: gen_sub3 = gen_subv2di3; break;
21866 gcc_unreachable ();
21868 /* Subtract (-(INT MAX) - 1) from both operands to make
21870 mask = ix86_build_signbit_mask (mode, true, false);
21871 t1 = gen_reg_rtx (mode);
21872 emit_insn (gen_sub3 (t1, cop0, mask));
21874 t2 = gen_reg_rtx (mode);
21875 emit_insn (gen_sub3 (t2, cop1, mask));
21889 /* Perform a parallel unsigned saturating subtraction. */
21890 x = gen_reg_rtx (mode);
21891 emit_insn (gen_rtx_SET (VOIDmode, x,
21892 gen_rtx_US_MINUS (mode, cop0, cop1)));
21895 cop1 = CONST0_RTX (mode);
21901 gcc_unreachable ();
21906 /* Allow the comparison to be done in one mode, but the movcc to
21907 happen in another mode. */
21908 if (data_mode == mode)
21910 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21911 operands[1+negate], operands[2-negate]);
21915 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21916 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21917 operands[1+negate], operands[2-negate]);
21918 if (GET_MODE (x) == mode)
21919 x = gen_lowpart (data_mode, x);
21922 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21923 operands[2-negate]);
21927 /* AVX512F does support 64-byte integer vector operations,
21928 thus the longest vector we are faced with is V64QImode. */
21929 #define MAX_VECT_LEN 64
21931 struct expand_vec_perm_d
21933 rtx target, op0, op1;
21934 unsigned char perm[MAX_VECT_LEN];
21935 machine_mode vmode;
21936 unsigned char nelt;
21937 bool one_operand_p;
21942 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21943 struct expand_vec_perm_d *d)
21945 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21946 expander, so args are either in d, or in op0, op1 etc. */
21947 machine_mode mode = GET_MODE (d ? d->op0 : op0);
21948 machine_mode maskmode = mode;
21949 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21954 if (TARGET_AVX512VL && TARGET_AVX512BW)
21955 gen = gen_avx512vl_vpermi2varv8hi3;
21958 if (TARGET_AVX512VL && TARGET_AVX512BW)
21959 gen = gen_avx512vl_vpermi2varv16hi3;
21962 if (TARGET_AVX512VBMI)
21963 gen = gen_avx512bw_vpermi2varv64qi3;
21966 if (TARGET_AVX512BW)
21967 gen = gen_avx512bw_vpermi2varv32hi3;
21970 if (TARGET_AVX512VL)
21971 gen = gen_avx512vl_vpermi2varv4si3;
21974 if (TARGET_AVX512VL)
21975 gen = gen_avx512vl_vpermi2varv8si3;
21978 if (TARGET_AVX512F)
21979 gen = gen_avx512f_vpermi2varv16si3;
21982 if (TARGET_AVX512VL)
21984 gen = gen_avx512vl_vpermi2varv4sf3;
21985 maskmode = V4SImode;
21989 if (TARGET_AVX512VL)
21991 gen = gen_avx512vl_vpermi2varv8sf3;
21992 maskmode = V8SImode;
21996 if (TARGET_AVX512F)
21998 gen = gen_avx512f_vpermi2varv16sf3;
21999 maskmode = V16SImode;
22003 if (TARGET_AVX512VL)
22004 gen = gen_avx512vl_vpermi2varv2di3;
22007 if (TARGET_AVX512VL)
22008 gen = gen_avx512vl_vpermi2varv4di3;
22011 if (TARGET_AVX512F)
22012 gen = gen_avx512f_vpermi2varv8di3;
22015 if (TARGET_AVX512VL)
22017 gen = gen_avx512vl_vpermi2varv2df3;
22018 maskmode = V2DImode;
22022 if (TARGET_AVX512VL)
22024 gen = gen_avx512vl_vpermi2varv4df3;
22025 maskmode = V4DImode;
22029 if (TARGET_AVX512F)
22031 gen = gen_avx512f_vpermi2varv8df3;
22032 maskmode = V8DImode;
22042 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
22043 expander, so args are either in d, or in op0, op1 etc. */
22047 target = d->target;
22050 for (int i = 0; i < d->nelt; ++i)
22051 vec[i] = GEN_INT (d->perm[i]);
22052 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
22055 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
22059 /* Expand a variable vector permutation. */
22062 ix86_expand_vec_perm (rtx operands[])
22064 rtx target = operands[0];
22065 rtx op0 = operands[1];
22066 rtx op1 = operands[2];
22067 rtx mask = operands[3];
22068 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
22069 machine_mode mode = GET_MODE (op0);
22070 machine_mode maskmode = GET_MODE (mask);
22072 bool one_operand_shuffle = rtx_equal_p (op0, op1);
22074 /* Number of elements in the vector. */
22075 w = GET_MODE_NUNITS (mode);
22076 e = GET_MODE_UNIT_SIZE (mode);
22077 gcc_assert (w <= 64);
22079 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
22084 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
22086 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
22087 an constant shuffle operand. With a tiny bit of effort we can
22088 use VPERMD instead. A re-interpretation stall for V4DFmode is
22089 unfortunate but there's no avoiding it.
22090 Similarly for V16HImode we don't have instructions for variable
22091 shuffling, while for V32QImode we can use after preparing suitable
22092 masks vpshufb; vpshufb; vpermq; vpor. */
22094 if (mode == V16HImode)
22096 maskmode = mode = V32QImode;
22102 maskmode = mode = V8SImode;
22106 t1 = gen_reg_rtx (maskmode);
22108 /* Replicate the low bits of the V4DImode mask into V8SImode:
22110 t1 = { A A B B C C D D }. */
22111 for (i = 0; i < w / 2; ++i)
22112 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
22113 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22114 vt = force_reg (maskmode, vt);
22115 mask = gen_lowpart (maskmode, mask);
22116 if (maskmode == V8SImode)
22117 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
22119 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
22121 /* Multiply the shuffle indicies by two. */
22122 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
22125 /* Add one to the odd shuffle indicies:
22126 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
22127 for (i = 0; i < w / 2; ++i)
22129 vec[i * 2] = const0_rtx;
22130 vec[i * 2 + 1] = const1_rtx;
22132 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22133 vt = validize_mem (force_const_mem (maskmode, vt));
22134 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22137 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22138 operands[3] = mask = t1;
22139 target = gen_reg_rtx (mode);
22140 op0 = gen_lowpart (mode, op0);
22141 op1 = gen_lowpart (mode, op1);
22147 /* The VPERMD and VPERMPS instructions already properly ignore
22148 the high bits of the shuffle elements. No need for us to
22149 perform an AND ourselves. */
22150 if (one_operand_shuffle)
22152 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22153 if (target != operands[0])
22154 emit_move_insn (operands[0],
22155 gen_lowpart (GET_MODE (operands[0]), target));
22159 t1 = gen_reg_rtx (V8SImode);
22160 t2 = gen_reg_rtx (V8SImode);
22161 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22162 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22168 mask = gen_lowpart (V8SImode, mask);
22169 if (one_operand_shuffle)
22170 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22173 t1 = gen_reg_rtx (V8SFmode);
22174 t2 = gen_reg_rtx (V8SFmode);
22175 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22176 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22182 /* By combining the two 128-bit input vectors into one 256-bit
22183 input vector, we can use VPERMD and VPERMPS for the full
22184 two-operand shuffle. */
22185 t1 = gen_reg_rtx (V8SImode);
22186 t2 = gen_reg_rtx (V8SImode);
22187 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22188 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22189 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22190 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22194 t1 = gen_reg_rtx (V8SFmode);
22195 t2 = gen_reg_rtx (V8SImode);
22196 mask = gen_lowpart (V4SImode, mask);
22197 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22198 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22199 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22200 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22204 t1 = gen_reg_rtx (V32QImode);
22205 t2 = gen_reg_rtx (V32QImode);
22206 t3 = gen_reg_rtx (V32QImode);
22207 vt2 = GEN_INT (-128);
22208 for (i = 0; i < 32; i++)
22210 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22211 vt = force_reg (V32QImode, vt);
22212 for (i = 0; i < 32; i++)
22213 vec[i] = i < 16 ? vt2 : const0_rtx;
22214 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22215 vt2 = force_reg (V32QImode, vt2);
22216 /* From mask create two adjusted masks, which contain the same
22217 bits as mask in the low 7 bits of each vector element.
22218 The first mask will have the most significant bit clear
22219 if it requests element from the same 128-bit lane
22220 and MSB set if it requests element from the other 128-bit lane.
22221 The second mask will have the opposite values of the MSB,
22222 and additionally will have its 128-bit lanes swapped.
22223 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22224 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22225 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22226 stands for other 12 bytes. */
22227 /* The bit whether element is from the same lane or the other
22228 lane is bit 4, so shift it up by 3 to the MSB position. */
22229 t5 = gen_reg_rtx (V4DImode);
22230 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22232 /* Clear MSB bits from the mask just in case it had them set. */
22233 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22234 /* After this t1 will have MSB set for elements from other lane. */
22235 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22236 /* Clear bits other than MSB. */
22237 emit_insn (gen_andv32qi3 (t1, t1, vt));
22238 /* Or in the lower bits from mask into t3. */
22239 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22240 /* And invert MSB bits in t1, so MSB is set for elements from the same
22242 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22243 /* Swap 128-bit lanes in t3. */
22244 t6 = gen_reg_rtx (V4DImode);
22245 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22246 const2_rtx, GEN_INT (3),
22247 const0_rtx, const1_rtx));
22248 /* And or in the lower bits from mask into t1. */
22249 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22250 if (one_operand_shuffle)
22252 /* Each of these shuffles will put 0s in places where
22253 element from the other 128-bit lane is needed, otherwise
22254 will shuffle in the requested value. */
22255 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22256 gen_lowpart (V32QImode, t6)));
22257 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22258 /* For t3 the 128-bit lanes are swapped again. */
22259 t7 = gen_reg_rtx (V4DImode);
22260 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22261 const2_rtx, GEN_INT (3),
22262 const0_rtx, const1_rtx));
22263 /* And oring both together leads to the result. */
22264 emit_insn (gen_iorv32qi3 (target, t1,
22265 gen_lowpart (V32QImode, t7)));
22266 if (target != operands[0])
22267 emit_move_insn (operands[0],
22268 gen_lowpart (GET_MODE (operands[0]), target));
22272 t4 = gen_reg_rtx (V32QImode);
22273 /* Similarly to the above one_operand_shuffle code,
22274 just for repeated twice for each operand. merge_two:
22275 code will merge the two results together. */
22276 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22277 gen_lowpart (V32QImode, t6)));
22278 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22279 gen_lowpart (V32QImode, t6)));
22280 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22281 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22282 t7 = gen_reg_rtx (V4DImode);
22283 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22284 const2_rtx, GEN_INT (3),
22285 const0_rtx, const1_rtx));
22286 t8 = gen_reg_rtx (V4DImode);
22287 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22288 const2_rtx, GEN_INT (3),
22289 const0_rtx, const1_rtx));
22290 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22291 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22297 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22304 /* The XOP VPPERM insn supports three inputs. By ignoring the
22305 one_operand_shuffle special case, we avoid creating another
22306 set of constant vectors in memory. */
22307 one_operand_shuffle = false;
22309 /* mask = mask & {2*w-1, ...} */
22310 vt = GEN_INT (2*w - 1);
22314 /* mask = mask & {w-1, ...} */
22315 vt = GEN_INT (w - 1);
22318 for (i = 0; i < w; i++)
22320 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22321 mask = expand_simple_binop (maskmode, AND, mask, vt,
22322 NULL_RTX, 0, OPTAB_DIRECT);
22324 /* For non-QImode operations, convert the word permutation control
22325 into a byte permutation control. */
22326 if (mode != V16QImode)
22328 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22329 GEN_INT (exact_log2 (e)),
22330 NULL_RTX, 0, OPTAB_DIRECT);
22332 /* Convert mask to vector of chars. */
22333 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22335 /* Replicate each of the input bytes into byte positions:
22336 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22337 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22338 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22339 for (i = 0; i < 16; ++i)
22340 vec[i] = GEN_INT (i/e * e);
22341 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22342 vt = validize_mem (force_const_mem (V16QImode, vt));
22344 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22346 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22348 /* Convert it into the byte positions by doing
22349 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22350 for (i = 0; i < 16; ++i)
22351 vec[i] = GEN_INT (i % e);
22352 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22353 vt = validize_mem (force_const_mem (V16QImode, vt));
22354 emit_insn (gen_addv16qi3 (mask, mask, vt));
22357 /* The actual shuffle operations all operate on V16QImode. */
22358 op0 = gen_lowpart (V16QImode, op0);
22359 op1 = gen_lowpart (V16QImode, op1);
22363 if (GET_MODE (target) != V16QImode)
22364 target = gen_reg_rtx (V16QImode);
22365 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22366 if (target != operands[0])
22367 emit_move_insn (operands[0],
22368 gen_lowpart (GET_MODE (operands[0]), target));
22370 else if (one_operand_shuffle)
22372 if (GET_MODE (target) != V16QImode)
22373 target = gen_reg_rtx (V16QImode);
22374 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22375 if (target != operands[0])
22376 emit_move_insn (operands[0],
22377 gen_lowpart (GET_MODE (operands[0]), target));
22384 /* Shuffle the two input vectors independently. */
22385 t1 = gen_reg_rtx (V16QImode);
22386 t2 = gen_reg_rtx (V16QImode);
22387 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22388 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22391 /* Then merge them together. The key is whether any given control
22392 element contained a bit set that indicates the second word. */
22393 mask = operands[3];
22395 if (maskmode == V2DImode && !TARGET_SSE4_1)
22397 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22398 more shuffle to convert the V2DI input mask into a V4SI
22399 input mask. At which point the masking that expand_int_vcond
22400 will work as desired. */
22401 rtx t3 = gen_reg_rtx (V4SImode);
22402 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22403 const0_rtx, const0_rtx,
22404 const2_rtx, const2_rtx));
22406 maskmode = V4SImode;
22410 for (i = 0; i < w; i++)
22412 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22413 vt = force_reg (maskmode, vt);
22414 mask = expand_simple_binop (maskmode, AND, mask, vt,
22415 NULL_RTX, 0, OPTAB_DIRECT);
22417 if (GET_MODE (target) != mode)
22418 target = gen_reg_rtx (mode);
22420 xops[1] = gen_lowpart (mode, t2);
22421 xops[2] = gen_lowpart (mode, t1);
22422 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22425 ok = ix86_expand_int_vcond (xops);
22427 if (target != operands[0])
22428 emit_move_insn (operands[0],
22429 gen_lowpart (GET_MODE (operands[0]), target));
22433 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22434 true if we should do zero extension, else sign extension. HIGH_P is
22435 true if we want the N/2 high elements, else the low elements. */
22438 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22440 machine_mode imode = GET_MODE (src);
22445 rtx (*unpack)(rtx, rtx);
22446 rtx (*extract)(rtx, rtx) = NULL;
22447 machine_mode halfmode = BLKmode;
22453 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22455 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22456 halfmode = V32QImode;
22458 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22462 unpack = gen_avx2_zero_extendv16qiv16hi2;
22464 unpack = gen_avx2_sign_extendv16qiv16hi2;
22465 halfmode = V16QImode;
22467 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22471 unpack = gen_avx512f_zero_extendv16hiv16si2;
22473 unpack = gen_avx512f_sign_extendv16hiv16si2;
22474 halfmode = V16HImode;
22476 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22480 unpack = gen_avx2_zero_extendv8hiv8si2;
22482 unpack = gen_avx2_sign_extendv8hiv8si2;
22483 halfmode = V8HImode;
22485 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22489 unpack = gen_avx512f_zero_extendv8siv8di2;
22491 unpack = gen_avx512f_sign_extendv8siv8di2;
22492 halfmode = V8SImode;
22494 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22498 unpack = gen_avx2_zero_extendv4siv4di2;
22500 unpack = gen_avx2_sign_extendv4siv4di2;
22501 halfmode = V4SImode;
22503 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22507 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22509 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22513 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22515 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22519 unpack = gen_sse4_1_zero_extendv2siv2di2;
22521 unpack = gen_sse4_1_sign_extendv2siv2di2;
22524 gcc_unreachable ();
22527 if (GET_MODE_SIZE (imode) >= 32)
22529 tmp = gen_reg_rtx (halfmode);
22530 emit_insn (extract (tmp, src));
22534 /* Shift higher 8 bytes to lower 8 bytes. */
22535 tmp = gen_reg_rtx (V1TImode);
22536 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22538 tmp = gen_lowpart (imode, tmp);
22543 emit_insn (unpack (dest, tmp));
22547 rtx (*unpack)(rtx, rtx, rtx);
22553 unpack = gen_vec_interleave_highv16qi;
22555 unpack = gen_vec_interleave_lowv16qi;
22559 unpack = gen_vec_interleave_highv8hi;
22561 unpack = gen_vec_interleave_lowv8hi;
22565 unpack = gen_vec_interleave_highv4si;
22567 unpack = gen_vec_interleave_lowv4si;
22570 gcc_unreachable ();
22574 tmp = force_reg (imode, CONST0_RTX (imode));
22576 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22577 src, pc_rtx, pc_rtx);
22579 rtx tmp2 = gen_reg_rtx (imode);
22580 emit_insn (unpack (tmp2, src, tmp));
22581 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22585 /* Expand conditional increment or decrement using adb/sbb instructions.
22586 The default case using setcc followed by the conditional move can be
22587 done by generic code. */
22589 ix86_expand_int_addcc (rtx operands[])
22591 enum rtx_code code = GET_CODE (operands[1]);
22593 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22595 rtx val = const0_rtx;
22596 bool fpcmp = false;
22598 rtx op0 = XEXP (operands[1], 0);
22599 rtx op1 = XEXP (operands[1], 1);
22601 if (operands[3] != const1_rtx
22602 && operands[3] != constm1_rtx)
22604 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22606 code = GET_CODE (compare_op);
22608 flags = XEXP (compare_op, 0);
22610 if (GET_MODE (flags) == CCFPmode
22611 || GET_MODE (flags) == CCFPUmode)
22614 code = ix86_fp_compare_code_to_integer (code);
22621 PUT_CODE (compare_op,
22622 reverse_condition_maybe_unordered
22623 (GET_CODE (compare_op)));
22625 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22628 mode = GET_MODE (operands[0]);
22630 /* Construct either adc or sbb insn. */
22631 if ((code == LTU) == (operands[3] == constm1_rtx))
22636 insn = gen_subqi3_carry;
22639 insn = gen_subhi3_carry;
22642 insn = gen_subsi3_carry;
22645 insn = gen_subdi3_carry;
22648 gcc_unreachable ();
22656 insn = gen_addqi3_carry;
22659 insn = gen_addhi3_carry;
22662 insn = gen_addsi3_carry;
22665 insn = gen_adddi3_carry;
22668 gcc_unreachable ();
22671 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22677 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22678 but works for floating pointer parameters and nonoffsetable memories.
22679 For pushes, it returns just stack offsets; the values will be saved
22680 in the right order. Maximally three parts are generated. */
22683 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22688 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22690 size = (GET_MODE_SIZE (mode) + 4) / 8;
22692 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22693 gcc_assert (size >= 2 && size <= 4);
22695 /* Optimize constant pool reference to immediates. This is used by fp
22696 moves, that force all constants to memory to allow combining. */
22697 if (MEM_P (operand) && MEM_READONLY_P (operand))
22699 rtx tmp = maybe_get_pool_constant (operand);
22704 if (MEM_P (operand) && !offsettable_memref_p (operand))
22706 /* The only non-offsetable memories we handle are pushes. */
22707 int ok = push_operand (operand, VOIDmode);
22711 operand = copy_rtx (operand);
22712 PUT_MODE (operand, word_mode);
22713 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22717 if (GET_CODE (operand) == CONST_VECTOR)
22719 machine_mode imode = int_mode_for_mode (mode);
22720 /* Caution: if we looked through a constant pool memory above,
22721 the operand may actually have a different mode now. That's
22722 ok, since we want to pun this all the way back to an integer. */
22723 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22724 gcc_assert (operand != NULL);
22730 if (mode == DImode)
22731 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22736 if (REG_P (operand))
22738 gcc_assert (reload_completed);
22739 for (i = 0; i < size; i++)
22740 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22742 else if (offsettable_memref_p (operand))
22744 operand = adjust_address (operand, SImode, 0);
22745 parts[0] = operand;
22746 for (i = 1; i < size; i++)
22747 parts[i] = adjust_address (operand, SImode, 4 * i);
22749 else if (GET_CODE (operand) == CONST_DOUBLE)
22754 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22758 real_to_target (l, &r, mode);
22759 parts[3] = gen_int_mode (l[3], SImode);
22760 parts[2] = gen_int_mode (l[2], SImode);
22763 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22764 long double may not be 80-bit. */
22765 real_to_target (l, &r, mode);
22766 parts[2] = gen_int_mode (l[2], SImode);
22769 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22772 gcc_unreachable ();
22774 parts[1] = gen_int_mode (l[1], SImode);
22775 parts[0] = gen_int_mode (l[0], SImode);
22778 gcc_unreachable ();
22783 if (mode == TImode)
22784 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22785 if (mode == XFmode || mode == TFmode)
22787 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22788 if (REG_P (operand))
22790 gcc_assert (reload_completed);
22791 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22792 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22794 else if (offsettable_memref_p (operand))
22796 operand = adjust_address (operand, DImode, 0);
22797 parts[0] = operand;
22798 parts[1] = adjust_address (operand, upper_mode, 8);
22800 else if (GET_CODE (operand) == CONST_DOUBLE)
22805 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22806 real_to_target (l, &r, mode);
22808 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22809 if (HOST_BITS_PER_WIDE_INT >= 64)
22812 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22813 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22816 parts[0] = immed_double_const (l[0], l[1], DImode);
22818 if (upper_mode == SImode)
22819 parts[1] = gen_int_mode (l[2], SImode);
22820 else if (HOST_BITS_PER_WIDE_INT >= 64)
22823 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22824 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22827 parts[1] = immed_double_const (l[2], l[3], DImode);
22830 gcc_unreachable ();
22837 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22838 Return false when normal moves are needed; true when all required
22839 insns have been emitted. Operands 2-4 contain the input values
22840 int the correct order; operands 5-7 contain the output values. */
22843 ix86_split_long_move (rtx operands[])
22848 int collisions = 0;
22849 machine_mode mode = GET_MODE (operands[0]);
22850 bool collisionparts[4];
22852 /* The DFmode expanders may ask us to move double.
22853 For 64bit target this is single move. By hiding the fact
22854 here we simplify i386.md splitters. */
22855 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22857 /* Optimize constant pool reference to immediates. This is used by
22858 fp moves, that force all constants to memory to allow combining. */
22860 if (MEM_P (operands[1])
22861 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22862 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22863 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22864 if (push_operand (operands[0], VOIDmode))
22866 operands[0] = copy_rtx (operands[0]);
22867 PUT_MODE (operands[0], word_mode);
22870 operands[0] = gen_lowpart (DImode, operands[0]);
22871 operands[1] = gen_lowpart (DImode, operands[1]);
22872 emit_move_insn (operands[0], operands[1]);
22876 /* The only non-offsettable memory we handle is push. */
22877 if (push_operand (operands[0], VOIDmode))
22880 gcc_assert (!MEM_P (operands[0])
22881 || offsettable_memref_p (operands[0]));
22883 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22884 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22886 /* When emitting push, take care for source operands on the stack. */
22887 if (push && MEM_P (operands[1])
22888 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22890 rtx src_base = XEXP (part[1][nparts - 1], 0);
22892 /* Compensate for the stack decrement by 4. */
22893 if (!TARGET_64BIT && nparts == 3
22894 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22895 src_base = plus_constant (Pmode, src_base, 4);
22897 /* src_base refers to the stack pointer and is
22898 automatically decreased by emitted push. */
22899 for (i = 0; i < nparts; i++)
22900 part[1][i] = change_address (part[1][i],
22901 GET_MODE (part[1][i]), src_base);
22904 /* We need to do copy in the right order in case an address register
22905 of the source overlaps the destination. */
22906 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22910 for (i = 0; i < nparts; i++)
22913 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22914 if (collisionparts[i])
22918 /* Collision in the middle part can be handled by reordering. */
22919 if (collisions == 1 && nparts == 3 && collisionparts [1])
22921 std::swap (part[0][1], part[0][2]);
22922 std::swap (part[1][1], part[1][2]);
22924 else if (collisions == 1
22926 && (collisionparts [1] || collisionparts [2]))
22928 if (collisionparts [1])
22930 std::swap (part[0][1], part[0][2]);
22931 std::swap (part[1][1], part[1][2]);
22935 std::swap (part[0][2], part[0][3]);
22936 std::swap (part[1][2], part[1][3]);
22940 /* If there are more collisions, we can't handle it by reordering.
22941 Do an lea to the last part and use only one colliding move. */
22942 else if (collisions > 1)
22948 base = part[0][nparts - 1];
22950 /* Handle the case when the last part isn't valid for lea.
22951 Happens in 64-bit mode storing the 12-byte XFmode. */
22952 if (GET_MODE (base) != Pmode)
22953 base = gen_rtx_REG (Pmode, REGNO (base));
22955 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22956 part[1][0] = replace_equiv_address (part[1][0], base);
22957 for (i = 1; i < nparts; i++)
22959 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22960 part[1][i] = replace_equiv_address (part[1][i], tmp);
22971 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22972 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22973 stack_pointer_rtx, GEN_INT (-4)));
22974 emit_move_insn (part[0][2], part[1][2]);
22976 else if (nparts == 4)
22978 emit_move_insn (part[0][3], part[1][3]);
22979 emit_move_insn (part[0][2], part[1][2]);
22984 /* In 64bit mode we don't have 32bit push available. In case this is
22985 register, it is OK - we will just use larger counterpart. We also
22986 retype memory - these comes from attempt to avoid REX prefix on
22987 moving of second half of TFmode value. */
22988 if (GET_MODE (part[1][1]) == SImode)
22990 switch (GET_CODE (part[1][1]))
22993 part[1][1] = adjust_address (part[1][1], DImode, 0);
22997 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
23001 gcc_unreachable ();
23004 if (GET_MODE (part[1][0]) == SImode)
23005 part[1][0] = part[1][1];
23008 emit_move_insn (part[0][1], part[1][1]);
23009 emit_move_insn (part[0][0], part[1][0]);
23013 /* Choose correct order to not overwrite the source before it is copied. */
23014 if ((REG_P (part[0][0])
23015 && REG_P (part[1][1])
23016 && (REGNO (part[0][0]) == REGNO (part[1][1])
23018 && REGNO (part[0][0]) == REGNO (part[1][2]))
23020 && REGNO (part[0][0]) == REGNO (part[1][3]))))
23022 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
23024 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
23026 operands[2 + i] = part[0][j];
23027 operands[6 + i] = part[1][j];
23032 for (i = 0; i < nparts; i++)
23034 operands[2 + i] = part[0][i];
23035 operands[6 + i] = part[1][i];
23039 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
23040 if (optimize_insn_for_size_p ())
23042 for (j = 0; j < nparts - 1; j++)
23043 if (CONST_INT_P (operands[6 + j])
23044 && operands[6 + j] != const0_rtx
23045 && REG_P (operands[2 + j]))
23046 for (i = j; i < nparts - 1; i++)
23047 if (CONST_INT_P (operands[7 + i])
23048 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
23049 operands[7 + i] = operands[2 + j];
23052 for (i = 0; i < nparts; i++)
23053 emit_move_insn (operands[2 + i], operands[6 + i]);
23058 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
23059 left shift by a constant, either using a single shift or
23060 a sequence of add instructions. */
23063 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
23065 rtx (*insn)(rtx, rtx, rtx);
23068 || (count * ix86_cost->add <= ix86_cost->shift_const
23069 && !optimize_insn_for_size_p ()))
23071 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
23072 while (count-- > 0)
23073 emit_insn (insn (operand, operand, operand));
23077 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23078 emit_insn (insn (operand, operand, GEN_INT (count)));
23083 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
23085 rtx (*gen_ashl3)(rtx, rtx, rtx);
23086 rtx (*gen_shld)(rtx, rtx, rtx);
23087 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23089 rtx low[2], high[2];
23092 if (CONST_INT_P (operands[2]))
23094 split_double_mode (mode, operands, 2, low, high);
23095 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23097 if (count >= half_width)
23099 emit_move_insn (high[0], low[1]);
23100 emit_move_insn (low[0], const0_rtx);
23102 if (count > half_width)
23103 ix86_expand_ashl_const (high[0], count - half_width, mode);
23107 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23109 if (!rtx_equal_p (operands[0], operands[1]))
23110 emit_move_insn (operands[0], operands[1]);
23112 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
23113 ix86_expand_ashl_const (low[0], count, mode);
23118 split_double_mode (mode, operands, 1, low, high);
23120 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23122 if (operands[1] == const1_rtx)
23124 /* Assuming we've chosen a QImode capable registers, then 1 << N
23125 can be done with two 32/64-bit shifts, no branches, no cmoves. */
23126 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23128 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23130 ix86_expand_clear (low[0]);
23131 ix86_expand_clear (high[0]);
23132 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23134 d = gen_lowpart (QImode, low[0]);
23135 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23136 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23137 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23139 d = gen_lowpart (QImode, high[0]);
23140 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23141 s = gen_rtx_NE (QImode, flags, const0_rtx);
23142 emit_insn (gen_rtx_SET (VOIDmode, d, s));
23145 /* Otherwise, we can get the same results by manually performing
23146 a bit extract operation on bit 5/6, and then performing the two
23147 shifts. The two methods of getting 0/1 into low/high are exactly
23148 the same size. Avoiding the shift in the bit extract case helps
23149 pentium4 a bit; no one else seems to care much either way. */
23152 machine_mode half_mode;
23153 rtx (*gen_lshr3)(rtx, rtx, rtx);
23154 rtx (*gen_and3)(rtx, rtx, rtx);
23155 rtx (*gen_xor3)(rtx, rtx, rtx);
23156 HOST_WIDE_INT bits;
23159 if (mode == DImode)
23161 half_mode = SImode;
23162 gen_lshr3 = gen_lshrsi3;
23163 gen_and3 = gen_andsi3;
23164 gen_xor3 = gen_xorsi3;
23169 half_mode = DImode;
23170 gen_lshr3 = gen_lshrdi3;
23171 gen_and3 = gen_anddi3;
23172 gen_xor3 = gen_xordi3;
23176 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23177 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23179 x = gen_lowpart (half_mode, operands[2]);
23180 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
23182 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23183 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23184 emit_move_insn (low[0], high[0]);
23185 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23188 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23189 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23193 if (operands[1] == constm1_rtx)
23195 /* For -1 << N, we can avoid the shld instruction, because we
23196 know that we're shifting 0...31/63 ones into a -1. */
23197 emit_move_insn (low[0], constm1_rtx);
23198 if (optimize_insn_for_size_p ())
23199 emit_move_insn (high[0], low[0]);
23201 emit_move_insn (high[0], constm1_rtx);
23205 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23207 if (!rtx_equal_p (operands[0], operands[1]))
23208 emit_move_insn (operands[0], operands[1]);
23210 split_double_mode (mode, operands, 1, low, high);
23211 emit_insn (gen_shld (high[0], low[0], operands[2]));
23214 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23216 if (TARGET_CMOVE && scratch)
23218 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23219 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23221 ix86_expand_clear (scratch);
23222 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23226 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23227 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23229 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23234 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23236 rtx (*gen_ashr3)(rtx, rtx, rtx)
23237 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23238 rtx (*gen_shrd)(rtx, rtx, rtx);
23239 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23241 rtx low[2], high[2];
23244 if (CONST_INT_P (operands[2]))
23246 split_double_mode (mode, operands, 2, low, high);
23247 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23249 if (count == GET_MODE_BITSIZE (mode) - 1)
23251 emit_move_insn (high[0], high[1]);
23252 emit_insn (gen_ashr3 (high[0], high[0],
23253 GEN_INT (half_width - 1)));
23254 emit_move_insn (low[0], high[0]);
23257 else if (count >= half_width)
23259 emit_move_insn (low[0], high[1]);
23260 emit_move_insn (high[0], low[0]);
23261 emit_insn (gen_ashr3 (high[0], high[0],
23262 GEN_INT (half_width - 1)));
23264 if (count > half_width)
23265 emit_insn (gen_ashr3 (low[0], low[0],
23266 GEN_INT (count - half_width)));
23270 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23272 if (!rtx_equal_p (operands[0], operands[1]))
23273 emit_move_insn (operands[0], operands[1]);
23275 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23276 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23281 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23283 if (!rtx_equal_p (operands[0], operands[1]))
23284 emit_move_insn (operands[0], operands[1]);
23286 split_double_mode (mode, operands, 1, low, high);
23288 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23289 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23291 if (TARGET_CMOVE && scratch)
23293 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23294 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23296 emit_move_insn (scratch, high[0]);
23297 emit_insn (gen_ashr3 (scratch, scratch,
23298 GEN_INT (half_width - 1)));
23299 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23304 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23305 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23307 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23313 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23315 rtx (*gen_lshr3)(rtx, rtx, rtx)
23316 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23317 rtx (*gen_shrd)(rtx, rtx, rtx);
23318 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23320 rtx low[2], high[2];
23323 if (CONST_INT_P (operands[2]))
23325 split_double_mode (mode, operands, 2, low, high);
23326 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23328 if (count >= half_width)
23330 emit_move_insn (low[0], high[1]);
23331 ix86_expand_clear (high[0]);
23333 if (count > half_width)
23334 emit_insn (gen_lshr3 (low[0], low[0],
23335 GEN_INT (count - half_width)));
23339 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23341 if (!rtx_equal_p (operands[0], operands[1]))
23342 emit_move_insn (operands[0], operands[1]);
23344 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23345 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23350 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23352 if (!rtx_equal_p (operands[0], operands[1]))
23353 emit_move_insn (operands[0], operands[1]);
23355 split_double_mode (mode, operands, 1, low, high);
23357 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23358 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23360 if (TARGET_CMOVE && scratch)
23362 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23363 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23365 ix86_expand_clear (scratch);
23366 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23371 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23372 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23374 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23379 /* Predict just emitted jump instruction to be taken with probability PROB. */
23381 predict_jump (int prob)
23383 rtx insn = get_last_insn ();
23384 gcc_assert (JUMP_P (insn));
23385 add_int_reg_note (insn, REG_BR_PROB, prob);
23388 /* Helper function for the string operations below. Dest VARIABLE whether
23389 it is aligned to VALUE bytes. If true, jump to the label. */
23390 static rtx_code_label *
23391 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23393 rtx_code_label *label = gen_label_rtx ();
23394 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23395 if (GET_MODE (variable) == DImode)
23396 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23398 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23399 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23402 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23404 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23408 /* Adjust COUNTER by the VALUE. */
23410 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23412 rtx (*gen_add)(rtx, rtx, rtx)
23413 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23415 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23418 /* Zero extend possibly SImode EXP to Pmode register. */
23420 ix86_zero_extend_to_Pmode (rtx exp)
23422 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23425 /* Divide COUNTREG by SCALE. */
23427 scale_counter (rtx countreg, int scale)
23433 if (CONST_INT_P (countreg))
23434 return GEN_INT (INTVAL (countreg) / scale);
23435 gcc_assert (REG_P (countreg));
23437 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23438 GEN_INT (exact_log2 (scale)),
23439 NULL, 1, OPTAB_DIRECT);
23443 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23444 DImode for constant loop counts. */
23446 static machine_mode
23447 counter_mode (rtx count_exp)
23449 if (GET_MODE (count_exp) != VOIDmode)
23450 return GET_MODE (count_exp);
23451 if (!CONST_INT_P (count_exp))
23453 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23458 /* Copy the address to a Pmode register. This is used for x32 to
23459 truncate DImode TLS address to a SImode register. */
23462 ix86_copy_addr_to_reg (rtx addr)
23464 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23465 return copy_addr_to_reg (addr);
23468 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23469 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
23473 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23474 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23475 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23476 memory by VALUE (supposed to be in MODE).
23478 The size is rounded down to whole number of chunk size moved at once.
23479 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23483 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23484 rtx destptr, rtx srcptr, rtx value,
23485 rtx count, machine_mode mode, int unroll,
23486 int expected_size, bool issetmem)
23488 rtx_code_label *out_label, *top_label;
23490 machine_mode iter_mode = counter_mode (count);
23491 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23492 rtx piece_size = GEN_INT (piece_size_n);
23493 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23497 top_label = gen_label_rtx ();
23498 out_label = gen_label_rtx ();
23499 iter = gen_reg_rtx (iter_mode);
23501 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23502 NULL, 1, OPTAB_DIRECT);
23503 /* Those two should combine. */
23504 if (piece_size == const1_rtx)
23506 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23508 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23510 emit_move_insn (iter, const0_rtx);
23512 emit_label (top_label);
23514 tmp = convert_modes (Pmode, iter_mode, iter, true);
23516 /* This assert could be relaxed - in this case we'll need to compute
23517 smallest power of two, containing in PIECE_SIZE_N and pass it to
23519 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23520 destmem = offset_address (destmem, tmp, piece_size_n);
23521 destmem = adjust_address (destmem, mode, 0);
23525 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23526 srcmem = adjust_address (srcmem, mode, 0);
23528 /* When unrolling for chips that reorder memory reads and writes,
23529 we can save registers by using single temporary.
23530 Also using 4 temporaries is overkill in 32bit mode. */
23531 if (!TARGET_64BIT && 0)
23533 for (i = 0; i < unroll; i++)
23538 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23540 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23542 emit_move_insn (destmem, srcmem);
23548 gcc_assert (unroll <= 4);
23549 for (i = 0; i < unroll; i++)
23551 tmpreg[i] = gen_reg_rtx (mode);
23555 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23557 emit_move_insn (tmpreg[i], srcmem);
23559 for (i = 0; i < unroll; i++)
23564 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23566 emit_move_insn (destmem, tmpreg[i]);
23571 for (i = 0; i < unroll; i++)
23575 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23576 emit_move_insn (destmem, value);
23579 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23580 true, OPTAB_LIB_WIDEN);
23582 emit_move_insn (iter, tmp);
23584 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23586 if (expected_size != -1)
23588 expected_size /= GET_MODE_SIZE (mode) * unroll;
23589 if (expected_size == 0)
23591 else if (expected_size > REG_BR_PROB_BASE)
23592 predict_jump (REG_BR_PROB_BASE - 1);
23594 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23597 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23598 iter = ix86_zero_extend_to_Pmode (iter);
23599 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23600 true, OPTAB_LIB_WIDEN);
23601 if (tmp != destptr)
23602 emit_move_insn (destptr, tmp);
23605 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23606 true, OPTAB_LIB_WIDEN);
23608 emit_move_insn (srcptr, tmp);
23610 emit_label (out_label);
23613 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23614 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23615 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23616 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23617 ORIG_VALUE is the original value passed to memset to fill the memory with.
23618 Other arguments have same meaning as for previous function. */
23621 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23622 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23624 machine_mode mode, bool issetmem)
23629 HOST_WIDE_INT rounded_count;
23631 /* If possible, it is shorter to use rep movs.
23632 TODO: Maybe it is better to move this logic to decide_alg. */
23633 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23634 && (!issetmem || orig_value == const0_rtx))
23637 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23638 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23640 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23641 GET_MODE_SIZE (mode)));
23642 if (mode != QImode)
23644 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23645 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23646 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23649 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23650 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23652 rounded_count = (INTVAL (count)
23653 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23654 destmem = shallow_copy_rtx (destmem);
23655 set_mem_size (destmem, rounded_count);
23657 else if (MEM_SIZE_KNOWN_P (destmem))
23658 clear_mem_size (destmem);
23662 value = force_reg (mode, gen_lowpart (mode, value));
23663 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23667 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23668 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23669 if (mode != QImode)
23671 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23672 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23673 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23676 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23677 if (CONST_INT_P (count))
23679 rounded_count = (INTVAL (count)
23680 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23681 srcmem = shallow_copy_rtx (srcmem);
23682 set_mem_size (srcmem, rounded_count);
23686 if (MEM_SIZE_KNOWN_P (srcmem))
23687 clear_mem_size (srcmem);
23689 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23694 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23696 SRC is passed by pointer to be updated on return.
23697 Return value is updated DST. */
23699 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23700 HOST_WIDE_INT size_to_move)
23702 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23703 enum insn_code code;
23704 machine_mode move_mode;
23707 /* Find the widest mode in which we could perform moves.
23708 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23709 it until move of such size is supported. */
23710 piece_size = 1 << floor_log2 (size_to_move);
23711 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23712 code = optab_handler (mov_optab, move_mode);
23713 while (code == CODE_FOR_nothing && piece_size > 1)
23716 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23717 code = optab_handler (mov_optab, move_mode);
23720 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23721 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23722 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23724 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23725 move_mode = mode_for_vector (word_mode, nunits);
23726 code = optab_handler (mov_optab, move_mode);
23727 if (code == CODE_FOR_nothing)
23729 move_mode = word_mode;
23730 piece_size = GET_MODE_SIZE (move_mode);
23731 code = optab_handler (mov_optab, move_mode);
23734 gcc_assert (code != CODE_FOR_nothing);
23736 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23737 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23739 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23740 gcc_assert (size_to_move % piece_size == 0);
23741 adjust = GEN_INT (piece_size);
23742 for (i = 0; i < size_to_move; i += piece_size)
23744 /* We move from memory to memory, so we'll need to do it via
23745 a temporary register. */
23746 tempreg = gen_reg_rtx (move_mode);
23747 emit_insn (GEN_FCN (code) (tempreg, src));
23748 emit_insn (GEN_FCN (code) (dst, tempreg));
23750 emit_move_insn (destptr,
23751 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23752 emit_move_insn (srcptr,
23753 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23755 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23757 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23761 /* Update DST and SRC rtx. */
23766 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23768 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23769 rtx destptr, rtx srcptr, rtx count, int max_size)
23772 if (CONST_INT_P (count))
23774 HOST_WIDE_INT countval = INTVAL (count);
23775 HOST_WIDE_INT epilogue_size = countval % max_size;
23778 /* For now MAX_SIZE should be a power of 2. This assert could be
23779 relaxed, but it'll require a bit more complicated epilogue
23781 gcc_assert ((max_size & (max_size - 1)) == 0);
23782 for (i = max_size; i >= 1; i >>= 1)
23784 if (epilogue_size & i)
23785 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23791 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23792 count, 1, OPTAB_DIRECT);
23793 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23794 count, QImode, 1, 4, false);
23798 /* When there are stringops, we can cheaply increase dest and src pointers.
23799 Otherwise we save code size by maintaining offset (zero is readily
23800 available from preceding rep operation) and using x86 addressing modes.
23802 if (TARGET_SINGLE_STRINGOP)
23806 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23807 src = change_address (srcmem, SImode, srcptr);
23808 dest = change_address (destmem, SImode, destptr);
23809 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23810 emit_label (label);
23811 LABEL_NUSES (label) = 1;
23815 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23816 src = change_address (srcmem, HImode, srcptr);
23817 dest = change_address (destmem, HImode, destptr);
23818 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23819 emit_label (label);
23820 LABEL_NUSES (label) = 1;
23824 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23825 src = change_address (srcmem, QImode, srcptr);
23826 dest = change_address (destmem, QImode, destptr);
23827 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23828 emit_label (label);
23829 LABEL_NUSES (label) = 1;
23834 rtx offset = force_reg (Pmode, const0_rtx);
23839 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23840 src = change_address (srcmem, SImode, srcptr);
23841 dest = change_address (destmem, SImode, destptr);
23842 emit_move_insn (dest, src);
23843 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23844 true, OPTAB_LIB_WIDEN);
23846 emit_move_insn (offset, tmp);
23847 emit_label (label);
23848 LABEL_NUSES (label) = 1;
23852 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23853 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23854 src = change_address (srcmem, HImode, tmp);
23855 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23856 dest = change_address (destmem, HImode, tmp);
23857 emit_move_insn (dest, src);
23858 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23859 true, OPTAB_LIB_WIDEN);
23861 emit_move_insn (offset, tmp);
23862 emit_label (label);
23863 LABEL_NUSES (label) = 1;
23867 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23868 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23869 src = change_address (srcmem, QImode, tmp);
23870 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23871 dest = change_address (destmem, QImode, tmp);
23872 emit_move_insn (dest, src);
23873 emit_label (label);
23874 LABEL_NUSES (label) = 1;
23879 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23880 with value PROMOTED_VAL.
23881 SRC is passed by pointer to be updated on return.
23882 Return value is updated DST. */
23884 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23885 HOST_WIDE_INT size_to_move)
23887 rtx dst = destmem, adjust;
23888 enum insn_code code;
23889 machine_mode move_mode;
23892 /* Find the widest mode in which we could perform moves.
23893 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23894 it until move of such size is supported. */
23895 move_mode = GET_MODE (promoted_val);
23896 if (move_mode == VOIDmode)
23897 move_mode = QImode;
23898 if (size_to_move < GET_MODE_SIZE (move_mode))
23900 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23901 promoted_val = gen_lowpart (move_mode, promoted_val);
23903 piece_size = GET_MODE_SIZE (move_mode);
23904 code = optab_handler (mov_optab, move_mode);
23905 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23907 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23909 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23910 gcc_assert (size_to_move % piece_size == 0);
23911 adjust = GEN_INT (piece_size);
23912 for (i = 0; i < size_to_move; i += piece_size)
23914 if (piece_size <= GET_MODE_SIZE (word_mode))
23916 emit_insn (gen_strset (destptr, dst, promoted_val));
23917 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23922 emit_insn (GEN_FCN (code) (dst, promoted_val));
23924 emit_move_insn (destptr,
23925 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23927 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23931 /* Update DST rtx. */
23934 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23936 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23937 rtx count, int max_size)
23940 expand_simple_binop (counter_mode (count), AND, count,
23941 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23942 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23943 gen_lowpart (QImode, value), count, QImode,
23944 1, max_size / 2, true);
23947 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23949 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23950 rtx count, int max_size)
23954 if (CONST_INT_P (count))
23956 HOST_WIDE_INT countval = INTVAL (count);
23957 HOST_WIDE_INT epilogue_size = countval % max_size;
23960 /* For now MAX_SIZE should be a power of 2. This assert could be
23961 relaxed, but it'll require a bit more complicated epilogue
23963 gcc_assert ((max_size & (max_size - 1)) == 0);
23964 for (i = max_size; i >= 1; i >>= 1)
23966 if (epilogue_size & i)
23968 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23969 destmem = emit_memset (destmem, destptr, vec_value, i);
23971 destmem = emit_memset (destmem, destptr, value, i);
23978 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23983 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23986 dest = change_address (destmem, DImode, destptr);
23987 emit_insn (gen_strset (destptr, dest, value));
23988 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23989 emit_insn (gen_strset (destptr, dest, value));
23993 dest = change_address (destmem, SImode, destptr);
23994 emit_insn (gen_strset (destptr, dest, value));
23995 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23996 emit_insn (gen_strset (destptr, dest, value));
23997 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23998 emit_insn (gen_strset (destptr, dest, value));
23999 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
24000 emit_insn (gen_strset (destptr, dest, value));
24002 emit_label (label);
24003 LABEL_NUSES (label) = 1;
24007 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
24010 dest = change_address (destmem, DImode, destptr);
24011 emit_insn (gen_strset (destptr, dest, value));
24015 dest = change_address (destmem, SImode, destptr);
24016 emit_insn (gen_strset (destptr, dest, value));
24017 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24018 emit_insn (gen_strset (destptr, dest, value));
24020 emit_label (label);
24021 LABEL_NUSES (label) = 1;
24025 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
24026 dest = change_address (destmem, SImode, destptr);
24027 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
24028 emit_label (label);
24029 LABEL_NUSES (label) = 1;
24033 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
24034 dest = change_address (destmem, HImode, destptr);
24035 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
24036 emit_label (label);
24037 LABEL_NUSES (label) = 1;
24041 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
24042 dest = change_address (destmem, QImode, destptr);
24043 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
24044 emit_label (label);
24045 LABEL_NUSES (label) = 1;
24049 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
24050 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
24051 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
24053 Return value is updated DESTMEM. */
24055 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
24056 rtx destptr, rtx srcptr, rtx value,
24057 rtx vec_value, rtx count, int align,
24058 int desired_alignment, bool issetmem)
24061 for (i = 1; i < desired_alignment; i <<= 1)
24065 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
24068 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24069 destmem = emit_memset (destmem, destptr, vec_value, i);
24071 destmem = emit_memset (destmem, destptr, value, i);
24074 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
24075 ix86_adjust_counter (count, i);
24076 emit_label (label);
24077 LABEL_NUSES (label) = 1;
24078 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
24084 /* Test if COUNT&SIZE is nonzero and if so, expand movme
24085 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
24086 and jump to DONE_LABEL. */
24088 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
24089 rtx destptr, rtx srcptr,
24090 rtx value, rtx vec_value,
24091 rtx count, int size,
24092 rtx done_label, bool issetmem)
24094 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
24095 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
24099 /* If we do not have vector value to copy, we must reduce size. */
24104 if (GET_MODE (value) == VOIDmode && size > 8)
24106 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
24107 mode = GET_MODE (value);
24110 mode = GET_MODE (vec_value), value = vec_value;
24114 /* Choose appropriate vector mode. */
24116 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
24117 else if (size >= 16)
24118 mode = TARGET_SSE ? V16QImode : DImode;
24119 srcmem = change_address (srcmem, mode, srcptr);
24121 destmem = change_address (destmem, mode, destptr);
24122 modesize = GEN_INT (GET_MODE_SIZE (mode));
24123 gcc_assert (GET_MODE_SIZE (mode) <= size);
24124 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24127 emit_move_insn (destmem, gen_lowpart (mode, value));
24130 emit_move_insn (destmem, srcmem);
24131 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24133 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24136 destmem = offset_address (destmem, count, 1);
24137 destmem = offset_address (destmem, GEN_INT (-2 * size),
24138 GET_MODE_SIZE (mode));
24141 srcmem = offset_address (srcmem, count, 1);
24142 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24143 GET_MODE_SIZE (mode));
24145 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24148 emit_move_insn (destmem, gen_lowpart (mode, value));
24151 emit_move_insn (destmem, srcmem);
24152 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24154 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24156 emit_jump_insn (gen_jump (done_label));
24159 emit_label (label);
24160 LABEL_NUSES (label) = 1;
24163 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24164 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24165 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24166 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24167 DONE_LABEL is a label after the whole copying sequence. The label is created
24168 on demand if *DONE_LABEL is NULL.
24169 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24170 bounds after the initial copies.
24172 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24173 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24174 we will dispatch to a library call for large blocks.
24176 In pseudocode we do:
24180 Assume that SIZE is 4. Bigger sizes are handled analogously
24183 copy 4 bytes from SRCPTR to DESTPTR
24184 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24189 copy 1 byte from SRCPTR to DESTPTR
24192 copy 2 bytes from SRCPTR to DESTPTR
24193 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24198 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24199 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24201 OLD_DESPTR = DESTPTR;
24202 Align DESTPTR up to DESIRED_ALIGN
24203 SRCPTR += DESTPTR - OLD_DESTPTR
24204 COUNT -= DEST_PTR - OLD_DESTPTR
24206 Round COUNT down to multiple of SIZE
24207 << optional caller supplied zero size guard is here >>
24208 << optional caller suppplied dynamic check is here >>
24209 << caller supplied main copy loop is here >>
24214 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24215 rtx *destptr, rtx *srcptr,
24217 rtx value, rtx vec_value,
24219 rtx_code_label **done_label,
24223 unsigned HOST_WIDE_INT *min_size,
24224 bool dynamic_check,
24227 rtx_code_label *loop_label = NULL, *label;
24230 int prolog_size = 0;
24233 /* Chose proper value to copy. */
24234 if (issetmem && VECTOR_MODE_P (mode))
24235 mode_value = vec_value;
24237 mode_value = value;
24238 gcc_assert (GET_MODE_SIZE (mode) <= size);
24240 /* See if block is big or small, handle small blocks. */
24241 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24244 loop_label = gen_label_rtx ();
24247 *done_label = gen_label_rtx ();
24249 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24253 /* Handle sizes > 3. */
24254 for (;size2 > 2; size2 >>= 1)
24255 expand_small_movmem_or_setmem (destmem, srcmem,
24259 size2, *done_label, issetmem);
24260 /* Nothing to copy? Jump to DONE_LABEL if so */
24261 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24264 /* Do a byte copy. */
24265 destmem = change_address (destmem, QImode, *destptr);
24267 emit_move_insn (destmem, gen_lowpart (QImode, value));
24270 srcmem = change_address (srcmem, QImode, *srcptr);
24271 emit_move_insn (destmem, srcmem);
24274 /* Handle sizes 2 and 3. */
24275 label = ix86_expand_aligntest (*count, 2, false);
24276 destmem = change_address (destmem, HImode, *destptr);
24277 destmem = offset_address (destmem, *count, 1);
24278 destmem = offset_address (destmem, GEN_INT (-2), 2);
24280 emit_move_insn (destmem, gen_lowpart (HImode, value));
24283 srcmem = change_address (srcmem, HImode, *srcptr);
24284 srcmem = offset_address (srcmem, *count, 1);
24285 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24286 emit_move_insn (destmem, srcmem);
24289 emit_label (label);
24290 LABEL_NUSES (label) = 1;
24291 emit_jump_insn (gen_jump (*done_label));
24295 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24296 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24298 /* Start memcpy for COUNT >= SIZE. */
24301 emit_label (loop_label);
24302 LABEL_NUSES (loop_label) = 1;
24305 /* Copy first desired_align bytes. */
24307 srcmem = change_address (srcmem, mode, *srcptr);
24308 destmem = change_address (destmem, mode, *destptr);
24309 modesize = GEN_INT (GET_MODE_SIZE (mode));
24310 for (n = 0; prolog_size < desired_align - align; n++)
24313 emit_move_insn (destmem, mode_value);
24316 emit_move_insn (destmem, srcmem);
24317 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24319 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24320 prolog_size += GET_MODE_SIZE (mode);
24324 /* Copy last SIZE bytes. */
24325 destmem = offset_address (destmem, *count, 1);
24326 destmem = offset_address (destmem,
24327 GEN_INT (-size - prolog_size),
24330 emit_move_insn (destmem, mode_value);
24333 srcmem = offset_address (srcmem, *count, 1);
24334 srcmem = offset_address (srcmem,
24335 GEN_INT (-size - prolog_size),
24337 emit_move_insn (destmem, srcmem);
24339 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24341 destmem = offset_address (destmem, modesize, 1);
24343 emit_move_insn (destmem, mode_value);
24346 srcmem = offset_address (srcmem, modesize, 1);
24347 emit_move_insn (destmem, srcmem);
24351 /* Align destination. */
24352 if (desired_align > 1 && desired_align > align)
24354 rtx saveddest = *destptr;
24356 gcc_assert (desired_align <= size);
24357 /* Align destptr up, place it to new register. */
24358 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24359 GEN_INT (prolog_size),
24360 NULL_RTX, 1, OPTAB_DIRECT);
24361 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24362 GEN_INT (-desired_align),
24363 *destptr, 1, OPTAB_DIRECT);
24364 /* See how many bytes we skipped. */
24365 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24367 saveddest, 1, OPTAB_DIRECT);
24368 /* Adjust srcptr and count. */
24370 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
24371 *srcptr, 1, OPTAB_DIRECT);
24372 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24373 saveddest, *count, 1, OPTAB_DIRECT);
24374 /* We copied at most size + prolog_size. */
24375 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24376 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24380 /* Our loops always round down the bock size, but for dispatch to library
24381 we need precise value. */
24383 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24384 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24388 gcc_assert (prolog_size == 0);
24389 /* Decrease count, so we won't end up copying last word twice. */
24390 if (!CONST_INT_P (*count))
24391 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24392 constm1_rtx, *count, 1, OPTAB_DIRECT);
24394 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24396 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24401 /* This function is like the previous one, except here we know how many bytes
24402 need to be copied. That allows us to update alignment not only of DST, which
24403 is returned, but also of SRC, which is passed as a pointer for that
24406 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24407 rtx srcreg, rtx value, rtx vec_value,
24408 int desired_align, int align_bytes,
24412 rtx orig_dst = dst;
24413 rtx orig_src = NULL;
24414 int piece_size = 1;
24415 int copied_bytes = 0;
24419 gcc_assert (srcp != NULL);
24424 for (piece_size = 1;
24425 piece_size <= desired_align && copied_bytes < align_bytes;
24428 if (align_bytes & piece_size)
24432 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24433 dst = emit_memset (dst, destreg, vec_value, piece_size);
24435 dst = emit_memset (dst, destreg, value, piece_size);
24438 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24439 copied_bytes += piece_size;
24442 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24443 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24444 if (MEM_SIZE_KNOWN_P (orig_dst))
24445 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24449 int src_align_bytes = get_mem_align_offset (src, desired_align
24451 if (src_align_bytes >= 0)
24452 src_align_bytes = desired_align - src_align_bytes;
24453 if (src_align_bytes >= 0)
24455 unsigned int src_align;
24456 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24458 if ((src_align_bytes & (src_align - 1))
24459 == (align_bytes & (src_align - 1)))
24462 if (src_align > (unsigned int) desired_align)
24463 src_align = desired_align;
24464 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24465 set_mem_align (src, src_align * BITS_PER_UNIT);
24467 if (MEM_SIZE_KNOWN_P (orig_src))
24468 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24475 /* Return true if ALG can be used in current context.
24476 Assume we expand memset if MEMSET is true. */
24478 alg_usable_p (enum stringop_alg alg, bool memset)
24480 if (alg == no_stringop)
24482 if (alg == vector_loop)
24483 return TARGET_SSE || TARGET_AVX;
24484 /* Algorithms using the rep prefix want at least edi and ecx;
24485 additionally, memset wants eax and memcpy wants esi. Don't
24486 consider such algorithms if the user has appropriated those
24487 registers for their own purposes. */
24488 if (alg == rep_prefix_1_byte
24489 || alg == rep_prefix_4_byte
24490 || alg == rep_prefix_8_byte)
24491 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24492 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24496 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24497 static enum stringop_alg
24498 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24499 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24500 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24502 const struct stringop_algs * algs;
24503 bool optimize_for_speed;
24505 const struct processor_costs *cost;
24507 bool any_alg_usable_p = false;
24510 *dynamic_check = -1;
24512 /* Even if the string operation call is cold, we still might spend a lot
24513 of time processing large blocks. */
24514 if (optimize_function_for_size_p (cfun)
24515 || (optimize_insn_for_size_p ()
24517 || (expected_size != -1 && expected_size < 256))))
24518 optimize_for_speed = false;
24520 optimize_for_speed = true;
24522 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24524 algs = &cost->memset[TARGET_64BIT != 0];
24526 algs = &cost->memcpy[TARGET_64BIT != 0];
24528 /* See maximal size for user defined algorithm. */
24529 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24531 enum stringop_alg candidate = algs->size[i].alg;
24532 bool usable = alg_usable_p (candidate, memset);
24533 any_alg_usable_p |= usable;
24535 if (candidate != libcall && candidate && usable)
24536 max = algs->size[i].max;
24539 /* If expected size is not known but max size is small enough
24540 so inline version is a win, set expected size into
24542 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24543 && expected_size == -1)
24544 expected_size = min_size / 2 + max_size / 2;
24546 /* If user specified the algorithm, honnor it if possible. */
24547 if (ix86_stringop_alg != no_stringop
24548 && alg_usable_p (ix86_stringop_alg, memset))
24549 return ix86_stringop_alg;
24550 /* rep; movq or rep; movl is the smallest variant. */
24551 else if (!optimize_for_speed)
24554 if (!count || (count & 3) || (memset && !zero_memset))
24555 return alg_usable_p (rep_prefix_1_byte, memset)
24556 ? rep_prefix_1_byte : loop_1_byte;
24558 return alg_usable_p (rep_prefix_4_byte, memset)
24559 ? rep_prefix_4_byte : loop;
24561 /* Very tiny blocks are best handled via the loop, REP is expensive to
24563 else if (expected_size != -1 && expected_size < 4)
24564 return loop_1_byte;
24565 else if (expected_size != -1)
24567 enum stringop_alg alg = libcall;
24568 bool alg_noalign = false;
24569 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24571 /* We get here if the algorithms that were not libcall-based
24572 were rep-prefix based and we are unable to use rep prefixes
24573 based on global register usage. Break out of the loop and
24574 use the heuristic below. */
24575 if (algs->size[i].max == 0)
24577 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24579 enum stringop_alg candidate = algs->size[i].alg;
24581 if (candidate != libcall && alg_usable_p (candidate, memset))
24584 alg_noalign = algs->size[i].noalign;
24586 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24587 last non-libcall inline algorithm. */
24588 if (TARGET_INLINE_ALL_STRINGOPS)
24590 /* When the current size is best to be copied by a libcall,
24591 but we are still forced to inline, run the heuristic below
24592 that will pick code for medium sized blocks. */
24593 if (alg != libcall)
24595 *noalign = alg_noalign;
24598 else if (!any_alg_usable_p)
24601 else if (alg_usable_p (candidate, memset))
24603 *noalign = algs->size[i].noalign;
24609 /* When asked to inline the call anyway, try to pick meaningful choice.
24610 We look for maximal size of block that is faster to copy by hand and
24611 take blocks of at most of that size guessing that average size will
24612 be roughly half of the block.
24614 If this turns out to be bad, we might simply specify the preferred
24615 choice in ix86_costs. */
24616 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24617 && (algs->unknown_size == libcall
24618 || !alg_usable_p (algs->unknown_size, memset)))
24620 enum stringop_alg alg;
24622 /* If there aren't any usable algorithms, then recursing on
24623 smaller sizes isn't going to find anything. Just return the
24624 simple byte-at-a-time copy loop. */
24625 if (!any_alg_usable_p)
24627 /* Pick something reasonable. */
24628 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24629 *dynamic_check = 128;
24630 return loop_1_byte;
24634 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24635 zero_memset, dynamic_check, noalign);
24636 gcc_assert (*dynamic_check == -1);
24637 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24638 *dynamic_check = max;
24640 gcc_assert (alg != libcall);
24643 return (alg_usable_p (algs->unknown_size, memset)
24644 ? algs->unknown_size : libcall);
24647 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24648 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24650 decide_alignment (int align,
24651 enum stringop_alg alg,
24653 machine_mode move_mode)
24655 int desired_align = 0;
24657 gcc_assert (alg != no_stringop);
24659 if (alg == libcall)
24661 if (move_mode == VOIDmode)
24664 desired_align = GET_MODE_SIZE (move_mode);
24665 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24666 copying whole cacheline at once. */
24667 if (TARGET_PENTIUMPRO
24668 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24673 if (desired_align < align)
24674 desired_align = align;
24675 if (expected_size != -1 && expected_size < 4)
24676 desired_align = align;
24678 return desired_align;
24682 /* Helper function for memcpy. For QImode value 0xXY produce
24683 0xXYXYXYXY of wide specified by MODE. This is essentially
24684 a * 0x10101010, but we can do slightly better than
24685 synth_mult by unwinding the sequence by hand on CPUs with
24688 promote_duplicated_reg (machine_mode mode, rtx val)
24690 machine_mode valmode = GET_MODE (val);
24692 int nops = mode == DImode ? 3 : 2;
24694 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24695 if (val == const0_rtx)
24696 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24697 if (CONST_INT_P (val))
24699 HOST_WIDE_INT v = INTVAL (val) & 255;
24703 if (mode == DImode)
24704 v |= (v << 16) << 16;
24705 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24708 if (valmode == VOIDmode)
24710 if (valmode != QImode)
24711 val = gen_lowpart (QImode, val);
24712 if (mode == QImode)
24714 if (!TARGET_PARTIAL_REG_STALL)
24716 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24717 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24718 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24719 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24721 rtx reg = convert_modes (mode, QImode, val, true);
24722 tmp = promote_duplicated_reg (mode, const1_rtx);
24723 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24728 rtx reg = convert_modes (mode, QImode, val, true);
24730 if (!TARGET_PARTIAL_REG_STALL)
24731 if (mode == SImode)
24732 emit_insn (gen_movsi_insv_1 (reg, reg));
24734 emit_insn (gen_movdi_insv_1 (reg, reg));
24737 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24738 NULL, 1, OPTAB_DIRECT);
24740 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24742 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24743 NULL, 1, OPTAB_DIRECT);
24744 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24745 if (mode == SImode)
24747 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24748 NULL, 1, OPTAB_DIRECT);
24749 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24754 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24755 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24756 alignment from ALIGN to DESIRED_ALIGN. */
24758 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24764 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24765 promoted_val = promote_duplicated_reg (DImode, val);
24766 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24767 promoted_val = promote_duplicated_reg (SImode, val);
24768 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24769 promoted_val = promote_duplicated_reg (HImode, val);
24771 promoted_val = val;
24773 return promoted_val;
24776 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24777 operations when profitable. The code depends upon architecture, block size
24778 and alignment, but always has one of the following overall structures:
24780 Aligned move sequence:
24782 1) Prologue guard: Conditional that jumps up to epilogues for small
24783 blocks that can be handled by epilogue alone. This is faster
24784 but also needed for correctness, since prologue assume the block
24785 is larger than the desired alignment.
24787 Optional dynamic check for size and libcall for large
24788 blocks is emitted here too, with -minline-stringops-dynamically.
24790 2) Prologue: copy first few bytes in order to get destination
24791 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24792 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24793 copied. We emit either a jump tree on power of two sized
24794 blocks, or a byte loop.
24796 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24797 with specified algorithm.
24799 4) Epilogue: code copying tail of the block that is too small to be
24800 handled by main body (or up to size guarded by prologue guard).
24802 Misaligned move sequence
24804 1) missaligned move prologue/epilogue containing:
24805 a) Prologue handling small memory blocks and jumping to done_label
24806 (skipped if blocks are known to be large enough)
24807 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24808 needed by single possibly misaligned move
24809 (skipped if alignment is not needed)
24810 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24812 2) Zero size guard dispatching to done_label, if needed
24814 3) dispatch to library call, if needed,
24816 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24817 with specified algorithm. */
24819 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24820 rtx align_exp, rtx expected_align_exp,
24821 rtx expected_size_exp, rtx min_size_exp,
24822 rtx max_size_exp, rtx probable_max_size_exp,
24827 rtx_code_label *label = NULL;
24829 rtx_code_label *jump_around_label = NULL;
24830 HOST_WIDE_INT align = 1;
24831 unsigned HOST_WIDE_INT count = 0;
24832 HOST_WIDE_INT expected_size = -1;
24833 int size_needed = 0, epilogue_size_needed;
24834 int desired_align = 0, align_bytes = 0;
24835 enum stringop_alg alg;
24836 rtx promoted_val = NULL;
24837 rtx vec_promoted_val = NULL;
24838 bool force_loopy_epilogue = false;
24840 bool need_zero_guard = false;
24842 machine_mode move_mode = VOIDmode;
24843 int unroll_factor = 1;
24844 /* TODO: Once value ranges are available, fill in proper data. */
24845 unsigned HOST_WIDE_INT min_size = 0;
24846 unsigned HOST_WIDE_INT max_size = -1;
24847 unsigned HOST_WIDE_INT probable_max_size = -1;
24848 bool misaligned_prologue_used = false;
24850 if (CONST_INT_P (align_exp))
24851 align = INTVAL (align_exp);
24852 /* i386 can do misaligned access on reasonably increased cost. */
24853 if (CONST_INT_P (expected_align_exp)
24854 && INTVAL (expected_align_exp) > align)
24855 align = INTVAL (expected_align_exp);
24856 /* ALIGN is the minimum of destination and source alignment, but we care here
24857 just about destination alignment. */
24859 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24860 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24862 if (CONST_INT_P (count_exp))
24864 min_size = max_size = probable_max_size = count = expected_size
24865 = INTVAL (count_exp);
24866 /* When COUNT is 0, there is nothing to do. */
24873 min_size = INTVAL (min_size_exp);
24875 max_size = INTVAL (max_size_exp);
24876 if (probable_max_size_exp)
24877 probable_max_size = INTVAL (probable_max_size_exp);
24878 if (CONST_INT_P (expected_size_exp))
24879 expected_size = INTVAL (expected_size_exp);
24882 /* Make sure we don't need to care about overflow later on. */
24883 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24886 /* Step 0: Decide on preferred algorithm, desired alignment and
24887 size of chunks to be copied by main loop. */
24888 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24890 issetmem && val_exp == const0_rtx,
24891 &dynamic_check, &noalign);
24892 if (alg == libcall)
24894 gcc_assert (alg != no_stringop);
24896 /* For now vector-version of memset is generated only for memory zeroing, as
24897 creating of promoted vector value is very cheap in this case. */
24898 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24899 alg = unrolled_loop;
24902 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24903 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24905 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24908 move_mode = word_mode;
24914 gcc_unreachable ();
24916 need_zero_guard = true;
24917 move_mode = QImode;
24920 need_zero_guard = true;
24922 case unrolled_loop:
24923 need_zero_guard = true;
24924 unroll_factor = (TARGET_64BIT ? 4 : 2);
24927 need_zero_guard = true;
24929 /* Find the widest supported mode. */
24930 move_mode = word_mode;
24931 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24932 != CODE_FOR_nothing)
24933 move_mode = GET_MODE_WIDER_MODE (move_mode);
24935 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24936 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24937 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24939 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24940 move_mode = mode_for_vector (word_mode, nunits);
24941 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24942 move_mode = word_mode;
24944 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24946 case rep_prefix_8_byte:
24947 move_mode = DImode;
24949 case rep_prefix_4_byte:
24950 move_mode = SImode;
24952 case rep_prefix_1_byte:
24953 move_mode = QImode;
24956 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24957 epilogue_size_needed = size_needed;
24959 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24960 if (!TARGET_ALIGN_STRINGOPS || noalign)
24961 align = desired_align;
24963 /* Step 1: Prologue guard. */
24965 /* Alignment code needs count to be in register. */
24966 if (CONST_INT_P (count_exp) && desired_align > align)
24968 if (INTVAL (count_exp) > desired_align
24969 && INTVAL (count_exp) > size_needed)
24972 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24973 if (align_bytes <= 0)
24976 align_bytes = desired_align - align_bytes;
24978 if (align_bytes == 0)
24979 count_exp = force_reg (counter_mode (count_exp), count_exp);
24981 gcc_assert (desired_align >= 1 && align >= 1);
24983 /* Misaligned move sequences handle both prologue and epilogue at once.
24984 Default code generation results in a smaller code for large alignments
24985 and also avoids redundant job when sizes are known precisely. */
24986 misaligned_prologue_used
24987 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24988 && MAX (desired_align, epilogue_size_needed) <= 32
24989 && desired_align <= epilogue_size_needed
24990 && ((desired_align > align && !align_bytes)
24991 || (!count && epilogue_size_needed > 1)));
24993 /* Do the cheap promotion to allow better CSE across the
24994 main loop and epilogue (ie one load of the big constant in the
24996 For now the misaligned move sequences do not have fast path
24997 without broadcasting. */
24998 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
25000 if (alg == vector_loop)
25002 gcc_assert (val_exp == const0_rtx);
25003 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
25004 promoted_val = promote_duplicated_reg_to_size (val_exp,
25005 GET_MODE_SIZE (word_mode),
25006 desired_align, align);
25010 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25011 desired_align, align);
25014 /* Misaligned move sequences handles both prologues and epilogues at once.
25015 Default code generation results in smaller code for large alignments and
25016 also avoids redundant job when sizes are known precisely. */
25017 if (misaligned_prologue_used)
25019 /* Misaligned move prologue handled small blocks by itself. */
25020 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
25021 (dst, src, &destreg, &srcreg,
25022 move_mode, promoted_val, vec_promoted_val,
25024 &jump_around_label,
25025 desired_align < align
25026 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
25027 desired_align, align, &min_size, dynamic_check, issetmem);
25029 src = change_address (src, BLKmode, srcreg);
25030 dst = change_address (dst, BLKmode, destreg);
25031 set_mem_align (dst, desired_align * BITS_PER_UNIT);
25032 epilogue_size_needed = 0;
25033 if (need_zero_guard && !min_size)
25035 /* It is possible that we copied enough so the main loop will not
25037 gcc_assert (size_needed > 1);
25038 if (jump_around_label == NULL_RTX)
25039 jump_around_label = gen_label_rtx ();
25040 emit_cmp_and_jump_insns (count_exp,
25041 GEN_INT (size_needed),
25042 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
25043 if (expected_size == -1
25044 || expected_size < (desired_align - align) / 2 + size_needed)
25045 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25047 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25050 /* Ensure that alignment prologue won't copy past end of block. */
25051 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
25053 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
25054 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
25055 Make sure it is power of 2. */
25056 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
25058 /* To improve performance of small blocks, we jump around the VAL
25059 promoting mode. This mean that if the promoted VAL is not constant,
25060 we might not use it in the epilogue and have to use byte
25062 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
25063 force_loopy_epilogue = true;
25064 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25065 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25067 /* If main algorithm works on QImode, no epilogue is needed.
25068 For small sizes just don't align anything. */
25069 if (size_needed == 1)
25070 desired_align = align;
25075 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25077 label = gen_label_rtx ();
25078 emit_cmp_and_jump_insns (count_exp,
25079 GEN_INT (epilogue_size_needed),
25080 LTU, 0, counter_mode (count_exp), 1, label);
25081 if (expected_size == -1 || expected_size < epilogue_size_needed)
25082 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25084 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25088 /* Emit code to decide on runtime whether library call or inline should be
25090 if (dynamic_check != -1)
25092 if (!issetmem && CONST_INT_P (count_exp))
25094 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
25096 emit_block_move_via_libcall (dst, src, count_exp, false);
25097 count_exp = const0_rtx;
25103 rtx_code_label *hot_label = gen_label_rtx ();
25104 if (jump_around_label == NULL_RTX)
25105 jump_around_label = gen_label_rtx ();
25106 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
25107 LEU, 0, counter_mode (count_exp),
25109 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25111 set_storage_via_libcall (dst, count_exp, val_exp, false);
25113 emit_block_move_via_libcall (dst, src, count_exp, false);
25114 emit_jump (jump_around_label);
25115 emit_label (hot_label);
25119 /* Step 2: Alignment prologue. */
25120 /* Do the expensive promotion once we branched off the small blocks. */
25121 if (issetmem && !promoted_val)
25122 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25123 desired_align, align);
25125 if (desired_align > align && !misaligned_prologue_used)
25127 if (align_bytes == 0)
25129 /* Except for the first move in prologue, we no longer know
25130 constant offset in aliasing info. It don't seems to worth
25131 the pain to maintain it for the first move, so throw away
25133 dst = change_address (dst, BLKmode, destreg);
25135 src = change_address (src, BLKmode, srcreg);
25136 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25137 promoted_val, vec_promoted_val,
25138 count_exp, align, desired_align,
25140 /* At most desired_align - align bytes are copied. */
25141 if (min_size < (unsigned)(desired_align - align))
25144 min_size -= desired_align - align;
25148 /* If we know how many bytes need to be stored before dst is
25149 sufficiently aligned, maintain aliasing info accurately. */
25150 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25158 count_exp = plus_constant (counter_mode (count_exp),
25159 count_exp, -align_bytes);
25160 count -= align_bytes;
25161 min_size -= align_bytes;
25162 max_size -= align_bytes;
25164 if (need_zero_guard
25166 && (count < (unsigned HOST_WIDE_INT) size_needed
25167 || (align_bytes == 0
25168 && count < ((unsigned HOST_WIDE_INT) size_needed
25169 + desired_align - align))))
25171 /* It is possible that we copied enough so the main loop will not
25173 gcc_assert (size_needed > 1);
25174 if (label == NULL_RTX)
25175 label = gen_label_rtx ();
25176 emit_cmp_and_jump_insns (count_exp,
25177 GEN_INT (size_needed),
25178 LTU, 0, counter_mode (count_exp), 1, label);
25179 if (expected_size == -1
25180 || expected_size < (desired_align - align) / 2 + size_needed)
25181 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25183 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25186 if (label && size_needed == 1)
25188 emit_label (label);
25189 LABEL_NUSES (label) = 1;
25191 epilogue_size_needed = 1;
25193 promoted_val = val_exp;
25195 else if (label == NULL_RTX && !misaligned_prologue_used)
25196 epilogue_size_needed = size_needed;
25198 /* Step 3: Main loop. */
25205 gcc_unreachable ();
25208 case unrolled_loop:
25209 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25210 count_exp, move_mode, unroll_factor,
25211 expected_size, issetmem);
25214 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25215 vec_promoted_val, count_exp, move_mode,
25216 unroll_factor, expected_size, issetmem);
25218 case rep_prefix_8_byte:
25219 case rep_prefix_4_byte:
25220 case rep_prefix_1_byte:
25221 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25222 val_exp, count_exp, move_mode, issetmem);
25225 /* Adjust properly the offset of src and dest memory for aliasing. */
25226 if (CONST_INT_P (count_exp))
25229 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25230 (count / size_needed) * size_needed);
25231 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25232 (count / size_needed) * size_needed);
25237 src = change_address (src, BLKmode, srcreg);
25238 dst = change_address (dst, BLKmode, destreg);
25241 /* Step 4: Epilogue to copy the remaining bytes. */
25245 /* When the main loop is done, COUNT_EXP might hold original count,
25246 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25247 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25248 bytes. Compensate if needed. */
25250 if (size_needed < epilogue_size_needed)
25253 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25254 GEN_INT (size_needed - 1), count_exp, 1,
25256 if (tmp != count_exp)
25257 emit_move_insn (count_exp, tmp);
25259 emit_label (label);
25260 LABEL_NUSES (label) = 1;
25263 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25265 if (force_loopy_epilogue)
25266 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25267 epilogue_size_needed);
25271 expand_setmem_epilogue (dst, destreg, promoted_val,
25272 vec_promoted_val, count_exp,
25273 epilogue_size_needed);
25275 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25276 epilogue_size_needed);
25279 if (jump_around_label)
25280 emit_label (jump_around_label);
25285 /* Expand the appropriate insns for doing strlen if not just doing
25288 out = result, initialized with the start address
25289 align_rtx = alignment of the address.
25290 scratch = scratch register, initialized with the startaddress when
25291 not aligned, otherwise undefined
25293 This is just the body. It needs the initializations mentioned above and
25294 some address computing at the end. These things are done in i386.md. */
25297 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25301 rtx_code_label *align_2_label = NULL;
25302 rtx_code_label *align_3_label = NULL;
25303 rtx_code_label *align_4_label = gen_label_rtx ();
25304 rtx_code_label *end_0_label = gen_label_rtx ();
25306 rtx tmpreg = gen_reg_rtx (SImode);
25307 rtx scratch = gen_reg_rtx (SImode);
25311 if (CONST_INT_P (align_rtx))
25312 align = INTVAL (align_rtx);
25314 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25316 /* Is there a known alignment and is it less than 4? */
25319 rtx scratch1 = gen_reg_rtx (Pmode);
25320 emit_move_insn (scratch1, out);
25321 /* Is there a known alignment and is it not 2? */
25324 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25325 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25327 /* Leave just the 3 lower bits. */
25328 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25329 NULL_RTX, 0, OPTAB_WIDEN);
25331 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25332 Pmode, 1, align_4_label);
25333 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25334 Pmode, 1, align_2_label);
25335 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25336 Pmode, 1, align_3_label);
25340 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25341 check if is aligned to 4 - byte. */
25343 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25344 NULL_RTX, 0, OPTAB_WIDEN);
25346 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25347 Pmode, 1, align_4_label);
25350 mem = change_address (src, QImode, out);
25352 /* Now compare the bytes. */
25354 /* Compare the first n unaligned byte on a byte per byte basis. */
25355 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25356 QImode, 1, end_0_label);
25358 /* Increment the address. */
25359 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25361 /* Not needed with an alignment of 2 */
25364 emit_label (align_2_label);
25366 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25369 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25371 emit_label (align_3_label);
25374 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25377 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25380 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25381 align this loop. It gives only huge programs, but does not help to
25383 emit_label (align_4_label);
25385 mem = change_address (src, SImode, out);
25386 emit_move_insn (scratch, mem);
25387 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25389 /* This formula yields a nonzero result iff one of the bytes is zero.
25390 This saves three branches inside loop and many cycles. */
25392 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25393 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25394 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25395 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25396 gen_int_mode (0x80808080, SImode)));
25397 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25402 rtx reg = gen_reg_rtx (SImode);
25403 rtx reg2 = gen_reg_rtx (Pmode);
25404 emit_move_insn (reg, tmpreg);
25405 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25407 /* If zero is not in the first two bytes, move two bytes forward. */
25408 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25409 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25410 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25411 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
25412 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25415 /* Emit lea manually to avoid clobbering of flags. */
25416 emit_insn (gen_rtx_SET (SImode, reg2,
25417 gen_rtx_PLUS (Pmode, out, const2_rtx)));
25419 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25420 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25421 emit_insn (gen_rtx_SET (VOIDmode, out,
25422 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25428 rtx_code_label *end_2_label = gen_label_rtx ();
25429 /* Is zero in the first two bytes? */
25431 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25432 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25433 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25434 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25435 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25437 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25438 JUMP_LABEL (tmp) = end_2_label;
25440 /* Not in the first two. Move two bytes forward. */
25441 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25442 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25444 emit_label (end_2_label);
25448 /* Avoid branch in fixing the byte. */
25449 tmpreg = gen_lowpart (QImode, tmpreg);
25450 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25451 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25452 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25453 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25455 emit_label (end_0_label);
25458 /* Expand strlen. */
25461 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25463 rtx addr, scratch1, scratch2, scratch3, scratch4;
25465 /* The generic case of strlen expander is long. Avoid it's
25466 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25468 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25469 && !TARGET_INLINE_ALL_STRINGOPS
25470 && !optimize_insn_for_size_p ()
25471 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25474 addr = force_reg (Pmode, XEXP (src, 0));
25475 scratch1 = gen_reg_rtx (Pmode);
25477 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25478 && !optimize_insn_for_size_p ())
25480 /* Well it seems that some optimizer does not combine a call like
25481 foo(strlen(bar), strlen(bar));
25482 when the move and the subtraction is done here. It does calculate
25483 the length just once when these instructions are done inside of
25484 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25485 often used and I use one fewer register for the lifetime of
25486 output_strlen_unroll() this is better. */
25488 emit_move_insn (out, addr);
25490 ix86_expand_strlensi_unroll_1 (out, src, align);
25492 /* strlensi_unroll_1 returns the address of the zero at the end of
25493 the string, like memchr(), so compute the length by subtracting
25494 the start address. */
25495 emit_insn (ix86_gen_sub3 (out, out, addr));
25501 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25502 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25505 scratch2 = gen_reg_rtx (Pmode);
25506 scratch3 = gen_reg_rtx (Pmode);
25507 scratch4 = force_reg (Pmode, constm1_rtx);
25509 emit_move_insn (scratch3, addr);
25510 eoschar = force_reg (QImode, eoschar);
25512 src = replace_equiv_address_nv (src, scratch3);
25514 /* If .md starts supporting :P, this can be done in .md. */
25515 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25516 scratch4), UNSPEC_SCAS);
25517 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25518 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25519 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25524 /* For given symbol (function) construct code to compute address of it's PLT
25525 entry in large x86-64 PIC model. */
25527 construct_plt_address (rtx symbol)
25531 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25532 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25533 gcc_assert (Pmode == DImode);
25535 tmp = gen_reg_rtx (Pmode);
25536 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25538 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25539 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25544 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25546 rtx pop, bool sibcall)
25549 rtx use = NULL, call;
25550 unsigned int vec_len = 0;
25552 if (pop == const0_rtx)
25554 gcc_assert (!TARGET_64BIT || !pop);
25556 if (TARGET_MACHO && !TARGET_64BIT)
25559 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25560 fnaddr = machopic_indirect_call_target (fnaddr);
25565 /* Static functions and indirect calls don't need the pic register. */
25568 || (ix86_cmodel == CM_LARGE_PIC
25569 && DEFAULT_ABI != MS_ABI))
25570 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25571 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25573 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25574 if (ix86_use_pseudo_pic_reg ())
25575 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25576 pic_offset_table_rtx);
25580 /* Skip setting up RAX register for -mskip-rax-setup when there are no
25581 parameters passed in vector registers. */
25583 && (INTVAL (callarg2) > 0
25584 || (INTVAL (callarg2) == 0
25585 && (TARGET_SSE || !flag_skip_rax_setup))))
25587 rtx al = gen_rtx_REG (QImode, AX_REG);
25588 emit_move_insn (al, callarg2);
25589 use_reg (&use, al);
25592 if (ix86_cmodel == CM_LARGE_PIC
25595 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25596 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25597 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25599 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25600 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25602 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25603 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25606 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25610 /* We should add bounds as destination register in case
25611 pointer with bounds may be returned. */
25612 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25614 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25615 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25616 retval = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, retval, b0, b1));
25617 chkp_put_regs_to_expr_list (retval);
25620 call = gen_rtx_SET (VOIDmode, retval, call);
25622 vec[vec_len++] = call;
25626 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25627 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25628 vec[vec_len++] = pop;
25631 if (TARGET_64BIT_MS_ABI
25632 && (!callarg2 || INTVAL (callarg2) != -2))
25634 int const cregs_size
25635 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25638 for (i = 0; i < cregs_size; i++)
25640 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25641 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25643 clobber_reg (&use, gen_rtx_REG (mode, regno));
25648 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25649 call = emit_call_insn (call);
25651 CALL_INSN_FUNCTION_USAGE (call) = use;
25656 /* Output the assembly for a call instruction. */
25659 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25661 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25662 bool seh_nop_p = false;
25665 if (SIBLING_CALL_P (insn))
25668 xasm = "%!jmp\t%P0";
25669 /* SEH epilogue detection requires the indirect branch case
25670 to include REX.W. */
25671 else if (TARGET_SEH)
25672 xasm = "%!rex.W jmp %A0";
25674 xasm = "%!jmp\t%A0";
25676 output_asm_insn (xasm, &call_op);
25680 /* SEH unwinding can require an extra nop to be emitted in several
25681 circumstances. Determine if we have one of those. */
25686 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25688 /* If we get to another real insn, we don't need the nop. */
25692 /* If we get to the epilogue note, prevent a catch region from
25693 being adjacent to the standard epilogue sequence. If non-
25694 call-exceptions, we'll have done this during epilogue emission. */
25695 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25696 && !flag_non_call_exceptions
25697 && !can_throw_internal (insn))
25704 /* If we didn't find a real insn following the call, prevent the
25705 unwinder from looking into the next function. */
25711 xasm = "%!call\t%P0";
25713 xasm = "%!call\t%A0";
25715 output_asm_insn (xasm, &call_op);
25723 /* Clear stack slot assignments remembered from previous functions.
25724 This is called from INIT_EXPANDERS once before RTL is emitted for each
25727 static struct machine_function *
25728 ix86_init_machine_status (void)
25730 struct machine_function *f;
25732 f = ggc_cleared_alloc<machine_function> ();
25733 f->use_fast_prologue_epilogue_nregs = -1;
25734 f->call_abi = ix86_abi;
25739 /* Return a MEM corresponding to a stack slot with mode MODE.
25740 Allocate a new slot if necessary.
25742 The RTL for a function can have several slots available: N is
25743 which slot to use. */
25746 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25748 struct stack_local_entry *s;
25750 gcc_assert (n < MAX_386_STACK_LOCALS);
25752 for (s = ix86_stack_locals; s; s = s->next)
25753 if (s->mode == mode && s->n == n)
25754 return validize_mem (copy_rtx (s->rtl));
25756 s = ggc_alloc<stack_local_entry> ();
25759 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25761 s->next = ix86_stack_locals;
25762 ix86_stack_locals = s;
25763 return validize_mem (copy_rtx (s->rtl));
25767 ix86_instantiate_decls (void)
25769 struct stack_local_entry *s;
25771 for (s = ix86_stack_locals; s; s = s->next)
25772 if (s->rtl != NULL_RTX)
25773 instantiate_decl_rtl (s->rtl);
25776 /* Check whether x86 address PARTS is a pc-relative address. */
25779 rip_relative_addr_p (struct ix86_address *parts)
25781 rtx base, index, disp;
25783 base = parts->base;
25784 index = parts->index;
25785 disp = parts->disp;
25787 if (disp && !base && !index)
25793 if (GET_CODE (disp) == CONST)
25794 symbol = XEXP (disp, 0);
25795 if (GET_CODE (symbol) == PLUS
25796 && CONST_INT_P (XEXP (symbol, 1)))
25797 symbol = XEXP (symbol, 0);
25799 if (GET_CODE (symbol) == LABEL_REF
25800 || (GET_CODE (symbol) == SYMBOL_REF
25801 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25802 || (GET_CODE (symbol) == UNSPEC
25803 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25804 || XINT (symbol, 1) == UNSPEC_PCREL
25805 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25812 /* Calculate the length of the memory address in the instruction encoding.
25813 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25814 or other prefixes. We never generate addr32 prefix for LEA insn. */
25817 memory_address_length (rtx addr, bool lea)
25819 struct ix86_address parts;
25820 rtx base, index, disp;
25824 if (GET_CODE (addr) == PRE_DEC
25825 || GET_CODE (addr) == POST_INC
25826 || GET_CODE (addr) == PRE_MODIFY
25827 || GET_CODE (addr) == POST_MODIFY)
25830 ok = ix86_decompose_address (addr, &parts);
25833 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25835 /* If this is not LEA instruction, add the length of addr32 prefix. */
25836 if (TARGET_64BIT && !lea
25837 && (SImode_address_operand (addr, VOIDmode)
25838 || (parts.base && GET_MODE (parts.base) == SImode)
25839 || (parts.index && GET_MODE (parts.index) == SImode)))
25843 index = parts.index;
25846 if (base && GET_CODE (base) == SUBREG)
25847 base = SUBREG_REG (base);
25848 if (index && GET_CODE (index) == SUBREG)
25849 index = SUBREG_REG (index);
25851 gcc_assert (base == NULL_RTX || REG_P (base));
25852 gcc_assert (index == NULL_RTX || REG_P (index));
25855 - esp as the base always wants an index,
25856 - ebp as the base always wants a displacement,
25857 - r12 as the base always wants an index,
25858 - r13 as the base always wants a displacement. */
25860 /* Register Indirect. */
25861 if (base && !index && !disp)
25863 /* esp (for its index) and ebp (for its displacement) need
25864 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25866 if (base == arg_pointer_rtx
25867 || base == frame_pointer_rtx
25868 || REGNO (base) == SP_REG
25869 || REGNO (base) == BP_REG
25870 || REGNO (base) == R12_REG
25871 || REGNO (base) == R13_REG)
25875 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25876 is not disp32, but disp32(%rip), so for disp32
25877 SIB byte is needed, unless print_operand_address
25878 optimizes it into disp32(%rip) or (%rip) is implied
25880 else if (disp && !base && !index)
25883 if (rip_relative_addr_p (&parts))
25888 /* Find the length of the displacement constant. */
25891 if (base && satisfies_constraint_K (disp))
25896 /* ebp always wants a displacement. Similarly r13. */
25897 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25900 /* An index requires the two-byte modrm form.... */
25902 /* ...like esp (or r12), which always wants an index. */
25903 || base == arg_pointer_rtx
25904 || base == frame_pointer_rtx
25905 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25912 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25913 is set, expect that insn have 8bit immediate alternative. */
25915 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25919 extract_insn_cached (insn);
25920 for (i = recog_data.n_operands - 1; i >= 0; --i)
25921 if (CONSTANT_P (recog_data.operand[i]))
25923 enum attr_mode mode = get_attr_mode (insn);
25926 if (shortform && CONST_INT_P (recog_data.operand[i]))
25928 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25935 ival = trunc_int_for_mode (ival, HImode);
25938 ival = trunc_int_for_mode (ival, SImode);
25943 if (IN_RANGE (ival, -128, 127))
25960 /* Immediates for DImode instructions are encoded
25961 as 32bit sign extended values. */
25966 fatal_insn ("unknown insn mode", insn);
25972 /* Compute default value for "length_address" attribute. */
25974 ix86_attr_length_address_default (rtx_insn *insn)
25978 if (get_attr_type (insn) == TYPE_LEA)
25980 rtx set = PATTERN (insn), addr;
25982 if (GET_CODE (set) == PARALLEL)
25983 set = XVECEXP (set, 0, 0);
25985 gcc_assert (GET_CODE (set) == SET);
25987 addr = SET_SRC (set);
25989 return memory_address_length (addr, true);
25992 extract_insn_cached (insn);
25993 for (i = recog_data.n_operands - 1; i >= 0; --i)
25994 if (MEM_P (recog_data.operand[i]))
25996 constrain_operands_cached (insn, reload_completed);
25997 if (which_alternative != -1)
25999 const char *constraints = recog_data.constraints[i];
26000 int alt = which_alternative;
26002 while (*constraints == '=' || *constraints == '+')
26005 while (*constraints++ != ',')
26007 /* Skip ignored operands. */
26008 if (*constraints == 'X')
26011 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
26016 /* Compute default value for "length_vex" attribute. It includes
26017 2 or 3 byte VEX prefix and 1 opcode byte. */
26020 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
26025 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
26026 byte VEX prefix. */
26027 if (!has_0f_opcode || has_vex_w)
26030 /* We can always use 2 byte VEX prefix in 32bit. */
26034 extract_insn_cached (insn);
26036 for (i = recog_data.n_operands - 1; i >= 0; --i)
26037 if (REG_P (recog_data.operand[i]))
26039 /* REX.W bit uses 3 byte VEX prefix. */
26040 if (GET_MODE (recog_data.operand[i]) == DImode
26041 && GENERAL_REG_P (recog_data.operand[i]))
26046 /* REX.X or REX.B bits use 3 byte VEX prefix. */
26047 if (MEM_P (recog_data.operand[i])
26048 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
26055 /* Return the maximum number of instructions a cpu can issue. */
26058 ix86_issue_rate (void)
26062 case PROCESSOR_PENTIUM:
26063 case PROCESSOR_BONNELL:
26064 case PROCESSOR_SILVERMONT:
26065 case PROCESSOR_KNL:
26066 case PROCESSOR_INTEL:
26068 case PROCESSOR_BTVER2:
26069 case PROCESSOR_PENTIUM4:
26070 case PROCESSOR_NOCONA:
26073 case PROCESSOR_PENTIUMPRO:
26074 case PROCESSOR_ATHLON:
26076 case PROCESSOR_AMDFAM10:
26077 case PROCESSOR_GENERIC:
26078 case PROCESSOR_BTVER1:
26081 case PROCESSOR_BDVER1:
26082 case PROCESSOR_BDVER2:
26083 case PROCESSOR_BDVER3:
26084 case PROCESSOR_BDVER4:
26085 case PROCESSOR_CORE2:
26086 case PROCESSOR_NEHALEM:
26087 case PROCESSOR_SANDYBRIDGE:
26088 case PROCESSOR_HASWELL:
26096 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
26097 by DEP_INSN and nothing set by DEP_INSN. */
26100 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
26104 /* Simplify the test for uninteresting insns. */
26105 if (insn_type != TYPE_SETCC
26106 && insn_type != TYPE_ICMOV
26107 && insn_type != TYPE_FCMOV
26108 && insn_type != TYPE_IBR)
26111 if ((set = single_set (dep_insn)) != 0)
26113 set = SET_DEST (set);
26116 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
26117 && XVECLEN (PATTERN (dep_insn), 0) == 2
26118 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
26119 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
26121 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26122 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26127 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
26130 /* This test is true if the dependent insn reads the flags but
26131 not any other potentially set register. */
26132 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26135 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26141 /* Return true iff USE_INSN has a memory address with operands set by
26145 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26148 extract_insn_cached (use_insn);
26149 for (i = recog_data.n_operands - 1; i >= 0; --i)
26150 if (MEM_P (recog_data.operand[i]))
26152 rtx addr = XEXP (recog_data.operand[i], 0);
26153 return modified_in_p (addr, set_insn) != 0;
26158 /* Helper function for exact_store_load_dependency.
26159 Return true if addr is found in insn. */
26161 exact_dependency_1 (rtx addr, rtx insn)
26163 enum rtx_code code;
26164 const char *format_ptr;
26167 code = GET_CODE (insn);
26171 if (rtx_equal_p (addr, insn))
26186 format_ptr = GET_RTX_FORMAT (code);
26187 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26189 switch (*format_ptr++)
26192 if (exact_dependency_1 (addr, XEXP (insn, i)))
26196 for (j = 0; j < XVECLEN (insn, i); j++)
26197 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26205 /* Return true if there exists exact dependency for store & load, i.e.
26206 the same memory address is used in them. */
26208 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26212 set1 = single_set (store);
26215 if (!MEM_P (SET_DEST (set1)))
26217 set2 = single_set (load);
26220 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26226 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26228 enum attr_type insn_type, dep_insn_type;
26229 enum attr_memory memory;
26231 int dep_insn_code_number;
26233 /* Anti and output dependencies have zero cost on all CPUs. */
26234 if (REG_NOTE_KIND (link) != 0)
26237 dep_insn_code_number = recog_memoized (dep_insn);
26239 /* If we can't recognize the insns, we can't really do anything. */
26240 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26243 insn_type = get_attr_type (insn);
26244 dep_insn_type = get_attr_type (dep_insn);
26248 case PROCESSOR_PENTIUM:
26249 /* Address Generation Interlock adds a cycle of latency. */
26250 if (insn_type == TYPE_LEA)
26252 rtx addr = PATTERN (insn);
26254 if (GET_CODE (addr) == PARALLEL)
26255 addr = XVECEXP (addr, 0, 0);
26257 gcc_assert (GET_CODE (addr) == SET);
26259 addr = SET_SRC (addr);
26260 if (modified_in_p (addr, dep_insn))
26263 else if (ix86_agi_dependent (dep_insn, insn))
26266 /* ??? Compares pair with jump/setcc. */
26267 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26270 /* Floating point stores require value to be ready one cycle earlier. */
26271 if (insn_type == TYPE_FMOV
26272 && get_attr_memory (insn) == MEMORY_STORE
26273 && !ix86_agi_dependent (dep_insn, insn))
26277 case PROCESSOR_PENTIUMPRO:
26278 /* INT->FP conversion is expensive. */
26279 if (get_attr_fp_int_src (dep_insn))
26282 /* There is one cycle extra latency between an FP op and a store. */
26283 if (insn_type == TYPE_FMOV
26284 && (set = single_set (dep_insn)) != NULL_RTX
26285 && (set2 = single_set (insn)) != NULL_RTX
26286 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26287 && MEM_P (SET_DEST (set2)))
26290 memory = get_attr_memory (insn);
26292 /* Show ability of reorder buffer to hide latency of load by executing
26293 in parallel with previous instruction in case
26294 previous instruction is not needed to compute the address. */
26295 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26296 && !ix86_agi_dependent (dep_insn, insn))
26298 /* Claim moves to take one cycle, as core can issue one load
26299 at time and the next load can start cycle later. */
26300 if (dep_insn_type == TYPE_IMOV
26301 || dep_insn_type == TYPE_FMOV)
26309 /* The esp dependency is resolved before
26310 the instruction is really finished. */
26311 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26312 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26315 /* INT->FP conversion is expensive. */
26316 if (get_attr_fp_int_src (dep_insn))
26319 memory = get_attr_memory (insn);
26321 /* Show ability of reorder buffer to hide latency of load by executing
26322 in parallel with previous instruction in case
26323 previous instruction is not needed to compute the address. */
26324 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26325 && !ix86_agi_dependent (dep_insn, insn))
26327 /* Claim moves to take one cycle, as core can issue one load
26328 at time and the next load can start cycle later. */
26329 if (dep_insn_type == TYPE_IMOV
26330 || dep_insn_type == TYPE_FMOV)
26339 case PROCESSOR_AMDFAM10:
26340 case PROCESSOR_BDVER1:
26341 case PROCESSOR_BDVER2:
26342 case PROCESSOR_BDVER3:
26343 case PROCESSOR_BDVER4:
26344 case PROCESSOR_BTVER1:
26345 case PROCESSOR_BTVER2:
26346 case PROCESSOR_GENERIC:
26347 /* Stack engine allows to execute push&pop instructions in parall. */
26348 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26349 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26353 case PROCESSOR_ATHLON:
26355 memory = get_attr_memory (insn);
26357 /* Show ability of reorder buffer to hide latency of load by executing
26358 in parallel with previous instruction in case
26359 previous instruction is not needed to compute the address. */
26360 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26361 && !ix86_agi_dependent (dep_insn, insn))
26363 enum attr_unit unit = get_attr_unit (insn);
26366 /* Because of the difference between the length of integer and
26367 floating unit pipeline preparation stages, the memory operands
26368 for floating point are cheaper.
26370 ??? For Athlon it the difference is most probably 2. */
26371 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26374 loadcost = TARGET_ATHLON ? 2 : 0;
26376 if (cost >= loadcost)
26383 case PROCESSOR_CORE2:
26384 case PROCESSOR_NEHALEM:
26385 case PROCESSOR_SANDYBRIDGE:
26386 case PROCESSOR_HASWELL:
26387 /* Stack engine allows to execute push&pop instructions in parall. */
26388 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26389 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26392 memory = get_attr_memory (insn);
26394 /* Show ability of reorder buffer to hide latency of load by executing
26395 in parallel with previous instruction in case
26396 previous instruction is not needed to compute the address. */
26397 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26398 && !ix86_agi_dependent (dep_insn, insn))
26407 case PROCESSOR_SILVERMONT:
26408 case PROCESSOR_KNL:
26409 case PROCESSOR_INTEL:
26410 if (!reload_completed)
26413 /* Increase cost of integer loads. */
26414 memory = get_attr_memory (dep_insn);
26415 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26417 enum attr_unit unit = get_attr_unit (dep_insn);
26418 if (unit == UNIT_INTEGER && cost == 1)
26420 if (memory == MEMORY_LOAD)
26424 /* Increase cost of ld/st for short int types only
26425 because of store forwarding issue. */
26426 rtx set = single_set (dep_insn);
26427 if (set && (GET_MODE (SET_DEST (set)) == QImode
26428 || GET_MODE (SET_DEST (set)) == HImode))
26430 /* Increase cost of store/load insn if exact
26431 dependence exists and it is load insn. */
26432 enum attr_memory insn_memory = get_attr_memory (insn);
26433 if (insn_memory == MEMORY_LOAD
26434 && exact_store_load_dependency (dep_insn, insn))
26448 /* How many alternative schedules to try. This should be as wide as the
26449 scheduling freedom in the DFA, but no wider. Making this value too
26450 large results extra work for the scheduler. */
26453 ia32_multipass_dfa_lookahead (void)
26457 case PROCESSOR_PENTIUM:
26460 case PROCESSOR_PENTIUMPRO:
26464 case PROCESSOR_BDVER1:
26465 case PROCESSOR_BDVER2:
26466 case PROCESSOR_BDVER3:
26467 case PROCESSOR_BDVER4:
26468 /* We use lookahead value 4 for BD both before and after reload
26469 schedules. Plan is to have value 8 included for O3. */
26472 case PROCESSOR_CORE2:
26473 case PROCESSOR_NEHALEM:
26474 case PROCESSOR_SANDYBRIDGE:
26475 case PROCESSOR_HASWELL:
26476 case PROCESSOR_BONNELL:
26477 case PROCESSOR_SILVERMONT:
26478 case PROCESSOR_KNL:
26479 case PROCESSOR_INTEL:
26480 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26481 as many instructions can be executed on a cycle, i.e.,
26482 issue_rate. I wonder why tuning for many CPUs does not do this. */
26483 if (reload_completed)
26484 return ix86_issue_rate ();
26485 /* Don't use lookahead for pre-reload schedule to save compile time. */
26493 /* Return true if target platform supports macro-fusion. */
26496 ix86_macro_fusion_p ()
26498 return TARGET_FUSE_CMP_AND_BRANCH;
26501 /* Check whether current microarchitecture support macro fusion
26502 for insn pair "CONDGEN + CONDJMP". Refer to
26503 "Intel Architectures Optimization Reference Manual". */
26506 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26509 enum rtx_code ccode;
26510 rtx compare_set = NULL_RTX, test_if, cond;
26511 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26513 if (!any_condjump_p (condjmp))
26516 if (get_attr_type (condgen) != TYPE_TEST
26517 && get_attr_type (condgen) != TYPE_ICMP
26518 && get_attr_type (condgen) != TYPE_INCDEC
26519 && get_attr_type (condgen) != TYPE_ALU)
26522 compare_set = single_set (condgen);
26523 if (compare_set == NULL_RTX
26524 && !TARGET_FUSE_ALU_AND_BRANCH)
26527 if (compare_set == NULL_RTX)
26530 rtx pat = PATTERN (condgen);
26531 for (i = 0; i < XVECLEN (pat, 0); i++)
26532 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26534 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26535 if (GET_CODE (set_src) == COMPARE)
26536 compare_set = XVECEXP (pat, 0, i);
26538 alu_set = XVECEXP (pat, 0, i);
26541 if (compare_set == NULL_RTX)
26543 src = SET_SRC (compare_set);
26544 if (GET_CODE (src) != COMPARE)
26547 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26549 if ((MEM_P (XEXP (src, 0))
26550 && CONST_INT_P (XEXP (src, 1)))
26551 || (MEM_P (XEXP (src, 1))
26552 && CONST_INT_P (XEXP (src, 0))))
26555 /* No fusion for RIP-relative address. */
26556 if (MEM_P (XEXP (src, 0)))
26557 addr = XEXP (XEXP (src, 0), 0);
26558 else if (MEM_P (XEXP (src, 1)))
26559 addr = XEXP (XEXP (src, 1), 0);
26562 ix86_address parts;
26563 int ok = ix86_decompose_address (addr, &parts);
26566 if (rip_relative_addr_p (&parts))
26570 test_if = SET_SRC (pc_set (condjmp));
26571 cond = XEXP (test_if, 0);
26572 ccode = GET_CODE (cond);
26573 /* Check whether conditional jump use Sign or Overflow Flags. */
26574 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26581 /* Return true for TYPE_TEST and TYPE_ICMP. */
26582 if (get_attr_type (condgen) == TYPE_TEST
26583 || get_attr_type (condgen) == TYPE_ICMP)
26586 /* The following is the case that macro-fusion for alu + jmp. */
26587 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26590 /* No fusion for alu op with memory destination operand. */
26591 dest = SET_DEST (alu_set);
26595 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26597 if (get_attr_type (condgen) == TYPE_INCDEC
26607 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26608 execution. It is applied if
26609 (1) IMUL instruction is on the top of list;
26610 (2) There exists the only producer of independent IMUL instruction in
26612 Return index of IMUL producer if it was found and -1 otherwise. */
26614 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26617 rtx set, insn1, insn2;
26618 sd_iterator_def sd_it;
26623 if (!TARGET_BONNELL)
26626 /* Check that IMUL instruction is on the top of ready list. */
26627 insn = ready[n_ready - 1];
26628 set = single_set (insn);
26631 if (!(GET_CODE (SET_SRC (set)) == MULT
26632 && GET_MODE (SET_SRC (set)) == SImode))
26635 /* Search for producer of independent IMUL instruction. */
26636 for (i = n_ready - 2; i >= 0; i--)
26639 if (!NONDEBUG_INSN_P (insn))
26641 /* Skip IMUL instruction. */
26642 insn2 = PATTERN (insn);
26643 if (GET_CODE (insn2) == PARALLEL)
26644 insn2 = XVECEXP (insn2, 0, 0);
26645 if (GET_CODE (insn2) == SET
26646 && GET_CODE (SET_SRC (insn2)) == MULT
26647 && GET_MODE (SET_SRC (insn2)) == SImode)
26650 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26653 con = DEP_CON (dep);
26654 if (!NONDEBUG_INSN_P (con))
26656 insn1 = PATTERN (con);
26657 if (GET_CODE (insn1) == PARALLEL)
26658 insn1 = XVECEXP (insn1, 0, 0);
26660 if (GET_CODE (insn1) == SET
26661 && GET_CODE (SET_SRC (insn1)) == MULT
26662 && GET_MODE (SET_SRC (insn1)) == SImode)
26664 sd_iterator_def sd_it1;
26666 /* Check if there is no other dependee for IMUL. */
26668 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26671 pro = DEP_PRO (dep1);
26672 if (!NONDEBUG_INSN_P (pro))
26687 /* Try to find the best candidate on the top of ready list if two insns
26688 have the same priority - candidate is best if its dependees were
26689 scheduled earlier. Applied for Silvermont only.
26690 Return true if top 2 insns must be interchanged. */
26692 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26694 rtx_insn *top = ready[n_ready - 1];
26695 rtx_insn *next = ready[n_ready - 2];
26697 sd_iterator_def sd_it;
26701 #define INSN_TICK(INSN) (HID (INSN)->tick)
26703 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26706 if (!NONDEBUG_INSN_P (top))
26708 if (!NONJUMP_INSN_P (top))
26710 if (!NONDEBUG_INSN_P (next))
26712 if (!NONJUMP_INSN_P (next))
26714 set = single_set (top);
26717 set = single_set (next);
26721 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26723 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26725 /* Determine winner more precise. */
26726 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26729 pro = DEP_PRO (dep);
26730 if (!NONDEBUG_INSN_P (pro))
26732 if (INSN_TICK (pro) > clock1)
26733 clock1 = INSN_TICK (pro);
26735 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26738 pro = DEP_PRO (dep);
26739 if (!NONDEBUG_INSN_P (pro))
26741 if (INSN_TICK (pro) > clock2)
26742 clock2 = INSN_TICK (pro);
26745 if (clock1 == clock2)
26747 /* Determine winner - load must win. */
26748 enum attr_memory memory1, memory2;
26749 memory1 = get_attr_memory (top);
26750 memory2 = get_attr_memory (next);
26751 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26754 return (bool) (clock2 < clock1);
26760 /* Perform possible reodering of ready list for Atom/Silvermont only.
26761 Return issue rate. */
26763 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26764 int *pn_ready, int clock_var)
26766 int issue_rate = -1;
26767 int n_ready = *pn_ready;
26772 /* Set up issue rate. */
26773 issue_rate = ix86_issue_rate ();
26775 /* Do reodering for BONNELL/SILVERMONT only. */
26776 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26779 /* Nothing to do if ready list contains only 1 instruction. */
26783 /* Do reodering for post-reload scheduler only. */
26784 if (!reload_completed)
26787 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26789 if (sched_verbose > 1)
26790 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26791 INSN_UID (ready[index]));
26793 /* Put IMUL producer (ready[index]) at the top of ready list. */
26794 insn = ready[index];
26795 for (i = index; i < n_ready - 1; i++)
26796 ready[i] = ready[i + 1];
26797 ready[n_ready - 1] = insn;
26801 /* Skip selective scheduling since HID is not populated in it. */
26804 && swap_top_of_ready_list (ready, n_ready))
26806 if (sched_verbose > 1)
26807 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26808 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26809 /* Swap 2 top elements of ready list. */
26810 insn = ready[n_ready - 1];
26811 ready[n_ready - 1] = ready[n_ready - 2];
26812 ready[n_ready - 2] = insn;
26818 ix86_class_likely_spilled_p (reg_class_t);
26820 /* Returns true if lhs of insn is HW function argument register and set up
26821 is_spilled to true if it is likely spilled HW register. */
26823 insn_is_function_arg (rtx insn, bool* is_spilled)
26827 if (!NONDEBUG_INSN_P (insn))
26829 /* Call instructions are not movable, ignore it. */
26832 insn = PATTERN (insn);
26833 if (GET_CODE (insn) == PARALLEL)
26834 insn = XVECEXP (insn, 0, 0);
26835 if (GET_CODE (insn) != SET)
26837 dst = SET_DEST (insn);
26838 if (REG_P (dst) && HARD_REGISTER_P (dst)
26839 && ix86_function_arg_regno_p (REGNO (dst)))
26841 /* Is it likely spilled HW register? */
26842 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26843 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26844 *is_spilled = true;
26850 /* Add output dependencies for chain of function adjacent arguments if only
26851 there is a move to likely spilled HW register. Return first argument
26852 if at least one dependence was added or NULL otherwise. */
26854 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26857 rtx_insn *last = call;
26858 rtx_insn *first_arg = NULL;
26859 bool is_spilled = false;
26861 head = PREV_INSN (head);
26863 /* Find nearest to call argument passing instruction. */
26866 last = PREV_INSN (last);
26869 if (!NONDEBUG_INSN_P (last))
26871 if (insn_is_function_arg (last, &is_spilled))
26879 insn = PREV_INSN (last);
26880 if (!INSN_P (insn))
26884 if (!NONDEBUG_INSN_P (insn))
26889 if (insn_is_function_arg (insn, &is_spilled))
26891 /* Add output depdendence between two function arguments if chain
26892 of output arguments contains likely spilled HW registers. */
26894 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26895 first_arg = last = insn;
26905 /* Add output or anti dependency from insn to first_arg to restrict its code
26908 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26913 /* Add anti dependencies for bounds stores. */
26915 && GET_CODE (PATTERN (insn)) == PARALLEL
26916 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
26917 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
26919 add_dependence (first_arg, insn, REG_DEP_ANTI);
26923 set = single_set (insn);
26926 tmp = SET_DEST (set);
26929 /* Add output dependency to the first function argument. */
26930 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26933 /* Add anti dependency. */
26934 add_dependence (first_arg, insn, REG_DEP_ANTI);
26937 /* Avoid cross block motion of function argument through adding dependency
26938 from the first non-jump instruction in bb. */
26940 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26942 rtx_insn *insn = BB_END (bb);
26946 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26948 rtx set = single_set (insn);
26951 avoid_func_arg_motion (arg, insn);
26955 if (insn == BB_HEAD (bb))
26957 insn = PREV_INSN (insn);
26961 /* Hook for pre-reload schedule - avoid motion of function arguments
26962 passed in likely spilled HW registers. */
26964 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26967 rtx_insn *first_arg = NULL;
26968 if (reload_completed)
26970 while (head != tail && DEBUG_INSN_P (head))
26971 head = NEXT_INSN (head);
26972 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26973 if (INSN_P (insn) && CALL_P (insn))
26975 first_arg = add_parameter_dependencies (insn, head);
26978 /* Add dependee for first argument to predecessors if only
26979 region contains more than one block. */
26980 basic_block bb = BLOCK_FOR_INSN (insn);
26981 int rgn = CONTAINING_RGN (bb->index);
26982 int nr_blks = RGN_NR_BLOCKS (rgn);
26983 /* Skip trivial regions and region head blocks that can have
26984 predecessors outside of region. */
26985 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26990 /* Regions are SCCs with the exception of selective
26991 scheduling with pipelining of outer blocks enabled.
26992 So also check that immediate predecessors of a non-head
26993 block are in the same region. */
26994 FOR_EACH_EDGE (e, ei, bb->preds)
26996 /* Avoid creating of loop-carried dependencies through
26997 using topological ordering in the region. */
26998 if (rgn == CONTAINING_RGN (e->src->index)
26999 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
27000 add_dependee_for_func_arg (first_arg, e->src);
27008 else if (first_arg)
27009 avoid_func_arg_motion (first_arg, insn);
27012 /* Hook for pre-reload schedule - set priority of moves from likely spilled
27013 HW registers to maximum, to schedule them at soon as possible. These are
27014 moves from function argument registers at the top of the function entry
27015 and moves from function return value registers after call. */
27017 ix86_adjust_priority (rtx_insn *insn, int priority)
27021 if (reload_completed)
27024 if (!NONDEBUG_INSN_P (insn))
27027 set = single_set (insn);
27030 rtx tmp = SET_SRC (set);
27032 && HARD_REGISTER_P (tmp)
27033 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
27034 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
27035 return current_sched_info->sched_max_insns_priority;
27041 /* Model decoder of Core 2/i7.
27042 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
27043 track the instruction fetch block boundaries and make sure that long
27044 (9+ bytes) instructions are assigned to D0. */
27046 /* Maximum length of an insn that can be handled by
27047 a secondary decoder unit. '8' for Core 2/i7. */
27048 static int core2i7_secondary_decoder_max_insn_size;
27050 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
27051 '16' for Core 2/i7. */
27052 static int core2i7_ifetch_block_size;
27054 /* Maximum number of instructions decoder can handle per cycle.
27055 '6' for Core 2/i7. */
27056 static int core2i7_ifetch_block_max_insns;
27058 typedef struct ix86_first_cycle_multipass_data_ *
27059 ix86_first_cycle_multipass_data_t;
27060 typedef const struct ix86_first_cycle_multipass_data_ *
27061 const_ix86_first_cycle_multipass_data_t;
27063 /* A variable to store target state across calls to max_issue within
27065 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
27066 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
27068 /* Initialize DATA. */
27070 core2i7_first_cycle_multipass_init (void *_data)
27072 ix86_first_cycle_multipass_data_t data
27073 = (ix86_first_cycle_multipass_data_t) _data;
27075 data->ifetch_block_len = 0;
27076 data->ifetch_block_n_insns = 0;
27077 data->ready_try_change = NULL;
27078 data->ready_try_change_size = 0;
27081 /* Advancing the cycle; reset ifetch block counts. */
27083 core2i7_dfa_post_advance_cycle (void)
27085 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
27087 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27089 data->ifetch_block_len = 0;
27090 data->ifetch_block_n_insns = 0;
27093 static int min_insn_size (rtx_insn *);
27095 /* Filter out insns from ready_try that the core will not be able to issue
27096 on current cycle due to decoder. */
27098 core2i7_first_cycle_multipass_filter_ready_try
27099 (const_ix86_first_cycle_multipass_data_t data,
27100 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
27107 if (ready_try[n_ready])
27110 insn = get_ready_element (n_ready);
27111 insn_size = min_insn_size (insn);
27113 if (/* If this is a too long an insn for a secondary decoder ... */
27114 (!first_cycle_insn_p
27115 && insn_size > core2i7_secondary_decoder_max_insn_size)
27116 /* ... or it would not fit into the ifetch block ... */
27117 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
27118 /* ... or the decoder is full already ... */
27119 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
27120 /* ... mask the insn out. */
27122 ready_try[n_ready] = 1;
27124 if (data->ready_try_change)
27125 bitmap_set_bit (data->ready_try_change, n_ready);
27130 /* Prepare for a new round of multipass lookahead scheduling. */
27132 core2i7_first_cycle_multipass_begin (void *_data,
27133 signed char *ready_try, int n_ready,
27134 bool first_cycle_insn_p)
27136 ix86_first_cycle_multipass_data_t data
27137 = (ix86_first_cycle_multipass_data_t) _data;
27138 const_ix86_first_cycle_multipass_data_t prev_data
27139 = ix86_first_cycle_multipass_data;
27141 /* Restore the state from the end of the previous round. */
27142 data->ifetch_block_len = prev_data->ifetch_block_len;
27143 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
27145 /* Filter instructions that cannot be issued on current cycle due to
27146 decoder restrictions. */
27147 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27148 first_cycle_insn_p);
27151 /* INSN is being issued in current solution. Account for its impact on
27152 the decoder model. */
27154 core2i7_first_cycle_multipass_issue (void *_data,
27155 signed char *ready_try, int n_ready,
27156 rtx_insn *insn, const void *_prev_data)
27158 ix86_first_cycle_multipass_data_t data
27159 = (ix86_first_cycle_multipass_data_t) _data;
27160 const_ix86_first_cycle_multipass_data_t prev_data
27161 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27163 int insn_size = min_insn_size (insn);
27165 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27166 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27167 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27168 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27170 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27171 if (!data->ready_try_change)
27173 data->ready_try_change = sbitmap_alloc (n_ready);
27174 data->ready_try_change_size = n_ready;
27176 else if (data->ready_try_change_size < n_ready)
27178 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27180 data->ready_try_change_size = n_ready;
27182 bitmap_clear (data->ready_try_change);
27184 /* Filter out insns from ready_try that the core will not be able to issue
27185 on current cycle due to decoder. */
27186 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27190 /* Revert the effect on ready_try. */
27192 core2i7_first_cycle_multipass_backtrack (const void *_data,
27193 signed char *ready_try,
27194 int n_ready ATTRIBUTE_UNUSED)
27196 const_ix86_first_cycle_multipass_data_t data
27197 = (const_ix86_first_cycle_multipass_data_t) _data;
27198 unsigned int i = 0;
27199 sbitmap_iterator sbi;
27201 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27202 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27208 /* Save the result of multipass lookahead scheduling for the next round. */
27210 core2i7_first_cycle_multipass_end (const void *_data)
27212 const_ix86_first_cycle_multipass_data_t data
27213 = (const_ix86_first_cycle_multipass_data_t) _data;
27214 ix86_first_cycle_multipass_data_t next_data
27215 = ix86_first_cycle_multipass_data;
27219 next_data->ifetch_block_len = data->ifetch_block_len;
27220 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27224 /* Deallocate target data. */
27226 core2i7_first_cycle_multipass_fini (void *_data)
27228 ix86_first_cycle_multipass_data_t data
27229 = (ix86_first_cycle_multipass_data_t) _data;
27231 if (data->ready_try_change)
27233 sbitmap_free (data->ready_try_change);
27234 data->ready_try_change = NULL;
27235 data->ready_try_change_size = 0;
27239 /* Prepare for scheduling pass. */
27241 ix86_sched_init_global (FILE *, int, int)
27243 /* Install scheduling hooks for current CPU. Some of these hooks are used
27244 in time-critical parts of the scheduler, so we only set them up when
27245 they are actually used. */
27248 case PROCESSOR_CORE2:
27249 case PROCESSOR_NEHALEM:
27250 case PROCESSOR_SANDYBRIDGE:
27251 case PROCESSOR_HASWELL:
27252 /* Do not perform multipass scheduling for pre-reload schedule
27253 to save compile time. */
27254 if (reload_completed)
27256 targetm.sched.dfa_post_advance_cycle
27257 = core2i7_dfa_post_advance_cycle;
27258 targetm.sched.first_cycle_multipass_init
27259 = core2i7_first_cycle_multipass_init;
27260 targetm.sched.first_cycle_multipass_begin
27261 = core2i7_first_cycle_multipass_begin;
27262 targetm.sched.first_cycle_multipass_issue
27263 = core2i7_first_cycle_multipass_issue;
27264 targetm.sched.first_cycle_multipass_backtrack
27265 = core2i7_first_cycle_multipass_backtrack;
27266 targetm.sched.first_cycle_multipass_end
27267 = core2i7_first_cycle_multipass_end;
27268 targetm.sched.first_cycle_multipass_fini
27269 = core2i7_first_cycle_multipass_fini;
27271 /* Set decoder parameters. */
27272 core2i7_secondary_decoder_max_insn_size = 8;
27273 core2i7_ifetch_block_size = 16;
27274 core2i7_ifetch_block_max_insns = 6;
27277 /* ... Fall through ... */
27279 targetm.sched.dfa_post_advance_cycle = NULL;
27280 targetm.sched.first_cycle_multipass_init = NULL;
27281 targetm.sched.first_cycle_multipass_begin = NULL;
27282 targetm.sched.first_cycle_multipass_issue = NULL;
27283 targetm.sched.first_cycle_multipass_backtrack = NULL;
27284 targetm.sched.first_cycle_multipass_end = NULL;
27285 targetm.sched.first_cycle_multipass_fini = NULL;
27291 /* Compute the alignment given to a constant that is being placed in memory.
27292 EXP is the constant and ALIGN is the alignment that the object would
27294 The value of this function is used instead of that alignment to align
27298 ix86_constant_alignment (tree exp, int align)
27300 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27301 || TREE_CODE (exp) == INTEGER_CST)
27303 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27305 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27308 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27309 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27310 return BITS_PER_WORD;
27315 /* Compute the alignment for a static variable.
27316 TYPE is the data type, and ALIGN is the alignment that
27317 the object would ordinarily have. The value of this function is used
27318 instead of that alignment to align the object. */
27321 ix86_data_alignment (tree type, int align, bool opt)
27323 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27324 for symbols from other compilation units or symbols that don't need
27325 to bind locally. In order to preserve some ABI compatibility with
27326 those compilers, ensure we don't decrease alignment from what we
27329 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
27331 /* A data structure, equal or greater than the size of a cache line
27332 (64 bytes in the Pentium 4 and other recent Intel processors, including
27333 processors based on Intel Core microarchitecture) should be aligned
27334 so that its base address is a multiple of a cache line size. */
27337 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27339 if (max_align < BITS_PER_WORD)
27340 max_align = BITS_PER_WORD;
27342 switch (ix86_align_data_type)
27344 case ix86_align_data_type_abi: opt = false; break;
27345 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
27346 case ix86_align_data_type_cacheline: break;
27350 && AGGREGATE_TYPE_P (type)
27351 && TYPE_SIZE (type)
27352 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27354 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27355 && align < max_align_compat)
27356 align = max_align_compat;
27357 if (wi::geu_p (TYPE_SIZE (type), max_align)
27358 && align < max_align)
27362 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27363 to 16byte boundary. */
27366 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27367 && TYPE_SIZE (type)
27368 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27369 && wi::geu_p (TYPE_SIZE (type), 128)
27377 if (TREE_CODE (type) == ARRAY_TYPE)
27379 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27381 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27384 else if (TREE_CODE (type) == COMPLEX_TYPE)
27387 if (TYPE_MODE (type) == DCmode && align < 64)
27389 if ((TYPE_MODE (type) == XCmode
27390 || TYPE_MODE (type) == TCmode) && align < 128)
27393 else if ((TREE_CODE (type) == RECORD_TYPE
27394 || TREE_CODE (type) == UNION_TYPE
27395 || TREE_CODE (type) == QUAL_UNION_TYPE)
27396 && TYPE_FIELDS (type))
27398 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27400 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27403 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27404 || TREE_CODE (type) == INTEGER_TYPE)
27406 if (TYPE_MODE (type) == DFmode && align < 64)
27408 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27415 /* Compute the alignment for a local variable or a stack slot. EXP is
27416 the data type or decl itself, MODE is the widest mode available and
27417 ALIGN is the alignment that the object would ordinarily have. The
27418 value of this macro is used instead of that alignment to align the
27422 ix86_local_alignment (tree exp, machine_mode mode,
27423 unsigned int align)
27427 if (exp && DECL_P (exp))
27429 type = TREE_TYPE (exp);
27438 /* Don't do dynamic stack realignment for long long objects with
27439 -mpreferred-stack-boundary=2. */
27442 && ix86_preferred_stack_boundary < 64
27443 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27444 && (!type || !TYPE_USER_ALIGN (type))
27445 && (!decl || !DECL_USER_ALIGN (decl)))
27448 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27449 register in MODE. We will return the largest alignment of XF
27453 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27454 align = GET_MODE_ALIGNMENT (DFmode);
27458 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27459 to 16byte boundary. Exact wording is:
27461 An array uses the same alignment as its elements, except that a local or
27462 global array variable of length at least 16 bytes or
27463 a C99 variable-length array variable always has alignment of at least 16 bytes.
27465 This was added to allow use of aligned SSE instructions at arrays. This
27466 rule is meant for static storage (where compiler can not do the analysis
27467 by itself). We follow it for automatic variables only when convenient.
27468 We fully control everything in the function compiled and functions from
27469 other unit can not rely on the alignment.
27471 Exclude va_list type. It is the common case of local array where
27472 we can not benefit from the alignment.
27474 TODO: Probably one should optimize for size only when var is not escaping. */
27475 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27478 if (AGGREGATE_TYPE_P (type)
27479 && (va_list_type_node == NULL_TREE
27480 || (TYPE_MAIN_VARIANT (type)
27481 != TYPE_MAIN_VARIANT (va_list_type_node)))
27482 && TYPE_SIZE (type)
27483 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27484 && wi::geu_p (TYPE_SIZE (type), 16)
27488 if (TREE_CODE (type) == ARRAY_TYPE)
27490 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27492 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27495 else if (TREE_CODE (type) == COMPLEX_TYPE)
27497 if (TYPE_MODE (type) == DCmode && align < 64)
27499 if ((TYPE_MODE (type) == XCmode
27500 || TYPE_MODE (type) == TCmode) && align < 128)
27503 else if ((TREE_CODE (type) == RECORD_TYPE
27504 || TREE_CODE (type) == UNION_TYPE
27505 || TREE_CODE (type) == QUAL_UNION_TYPE)
27506 && TYPE_FIELDS (type))
27508 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27510 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27513 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27514 || TREE_CODE (type) == INTEGER_TYPE)
27517 if (TYPE_MODE (type) == DFmode && align < 64)
27519 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27525 /* Compute the minimum required alignment for dynamic stack realignment
27526 purposes for a local variable, parameter or a stack slot. EXP is
27527 the data type or decl itself, MODE is its mode and ALIGN is the
27528 alignment that the object would ordinarily have. */
27531 ix86_minimum_alignment (tree exp, machine_mode mode,
27532 unsigned int align)
27536 if (exp && DECL_P (exp))
27538 type = TREE_TYPE (exp);
27547 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27550 /* Don't do dynamic stack realignment for long long objects with
27551 -mpreferred-stack-boundary=2. */
27552 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27553 && (!type || !TYPE_USER_ALIGN (type))
27554 && (!decl || !DECL_USER_ALIGN (decl)))
27560 /* Find a location for the static chain incoming to a nested function.
27561 This is a register, unless all free registers are used by arguments. */
27564 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27568 /* While this function won't be called by the middle-end when a static
27569 chain isn't needed, it's also used throughout the backend so it's
27570 easiest to keep this check centralized. */
27571 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27576 /* We always use R10 in 64-bit mode. */
27581 const_tree fntype, fndecl;
27584 /* By default in 32-bit mode we use ECX to pass the static chain. */
27587 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27589 fntype = TREE_TYPE (fndecl_or_type);
27590 fndecl = fndecl_or_type;
27594 fntype = fndecl_or_type;
27598 ccvt = ix86_get_callcvt (fntype);
27599 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27601 /* Fastcall functions use ecx/edx for arguments, which leaves
27602 us with EAX for the static chain.
27603 Thiscall functions use ecx for arguments, which also
27604 leaves us with EAX for the static chain. */
27607 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27609 /* Thiscall functions use ecx for arguments, which leaves
27610 us with EAX and EDX for the static chain.
27611 We are using for abi-compatibility EAX. */
27614 else if (ix86_function_regparm (fntype, fndecl) == 3)
27616 /* For regparm 3, we have no free call-clobbered registers in
27617 which to store the static chain. In order to implement this,
27618 we have the trampoline push the static chain to the stack.
27619 However, we can't push a value below the return address when
27620 we call the nested function directly, so we have to use an
27621 alternate entry point. For this we use ESI, and have the
27622 alternate entry point push ESI, so that things appear the
27623 same once we're executing the nested function. */
27626 if (fndecl == current_function_decl)
27627 ix86_static_chain_on_stack = true;
27628 return gen_frame_mem (SImode,
27629 plus_constant (Pmode,
27630 arg_pointer_rtx, -8));
27636 return gen_rtx_REG (Pmode, regno);
27639 /* Emit RTL insns to initialize the variable parts of a trampoline.
27640 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27641 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27642 to be passed to the target function. */
27645 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27651 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27657 /* Load the function address to r11. Try to load address using
27658 the shorter movl instead of movabs. We may want to support
27659 movq for kernel mode, but kernel does not use trampolines at
27660 the moment. FNADDR is a 32bit address and may not be in
27661 DImode when ptr_mode == SImode. Always use movl in this
27663 if (ptr_mode == SImode
27664 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27666 fnaddr = copy_addr_to_reg (fnaddr);
27668 mem = adjust_address (m_tramp, HImode, offset);
27669 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27671 mem = adjust_address (m_tramp, SImode, offset + 2);
27672 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27677 mem = adjust_address (m_tramp, HImode, offset);
27678 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27680 mem = adjust_address (m_tramp, DImode, offset + 2);
27681 emit_move_insn (mem, fnaddr);
27685 /* Load static chain using movabs to r10. Use the shorter movl
27686 instead of movabs when ptr_mode == SImode. */
27687 if (ptr_mode == SImode)
27698 mem = adjust_address (m_tramp, HImode, offset);
27699 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27701 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27702 emit_move_insn (mem, chain_value);
27705 /* Jump to r11; the last (unused) byte is a nop, only there to
27706 pad the write out to a single 32-bit store. */
27707 mem = adjust_address (m_tramp, SImode, offset);
27708 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27715 /* Depending on the static chain location, either load a register
27716 with a constant, or push the constant to the stack. All of the
27717 instructions are the same size. */
27718 chain = ix86_static_chain (fndecl, true);
27721 switch (REGNO (chain))
27724 opcode = 0xb8; break;
27726 opcode = 0xb9; break;
27728 gcc_unreachable ();
27734 mem = adjust_address (m_tramp, QImode, offset);
27735 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27737 mem = adjust_address (m_tramp, SImode, offset + 1);
27738 emit_move_insn (mem, chain_value);
27741 mem = adjust_address (m_tramp, QImode, offset);
27742 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27744 mem = adjust_address (m_tramp, SImode, offset + 1);
27746 /* Compute offset from the end of the jmp to the target function.
27747 In the case in which the trampoline stores the static chain on
27748 the stack, we need to skip the first insn which pushes the
27749 (call-saved) register static chain; this push is 1 byte. */
27751 disp = expand_binop (SImode, sub_optab, fnaddr,
27752 plus_constant (Pmode, XEXP (m_tramp, 0),
27753 offset - (MEM_P (chain) ? 1 : 0)),
27754 NULL_RTX, 1, OPTAB_DIRECT);
27755 emit_move_insn (mem, disp);
27758 gcc_assert (offset <= TRAMPOLINE_SIZE);
27760 #ifdef HAVE_ENABLE_EXECUTE_STACK
27761 #ifdef CHECK_EXECUTE_STACK_ENABLED
27762 if (CHECK_EXECUTE_STACK_ENABLED)
27764 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27765 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27769 /* The following file contains several enumerations and data structures
27770 built from the definitions in i386-builtin-types.def. */
27772 #include "i386-builtin-types.inc"
27774 /* Table for the ix86 builtin non-function types. */
27775 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27777 /* Retrieve an element from the above table, building some of
27778 the types lazily. */
27781 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27783 unsigned int index;
27786 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27788 type = ix86_builtin_type_tab[(int) tcode];
27792 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27793 if (tcode <= IX86_BT_LAST_VECT)
27797 index = tcode - IX86_BT_LAST_PRIM - 1;
27798 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27799 mode = ix86_builtin_type_vect_mode[index];
27801 type = build_vector_type_for_mode (itype, mode);
27807 index = tcode - IX86_BT_LAST_VECT - 1;
27808 if (tcode <= IX86_BT_LAST_PTR)
27809 quals = TYPE_UNQUALIFIED;
27811 quals = TYPE_QUAL_CONST;
27813 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27814 if (quals != TYPE_UNQUALIFIED)
27815 itype = build_qualified_type (itype, quals);
27817 type = build_pointer_type (itype);
27820 ix86_builtin_type_tab[(int) tcode] = type;
27824 /* Table for the ix86 builtin function types. */
27825 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27827 /* Retrieve an element from the above table, building some of
27828 the types lazily. */
27831 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27835 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27837 type = ix86_builtin_func_type_tab[(int) tcode];
27841 if (tcode <= IX86_BT_LAST_FUNC)
27843 unsigned start = ix86_builtin_func_start[(int) tcode];
27844 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27845 tree rtype, atype, args = void_list_node;
27848 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27849 for (i = after - 1; i > start; --i)
27851 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27852 args = tree_cons (NULL, atype, args);
27855 type = build_function_type (rtype, args);
27859 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27860 enum ix86_builtin_func_type icode;
27862 icode = ix86_builtin_func_alias_base[index];
27863 type = ix86_get_builtin_func_type (icode);
27866 ix86_builtin_func_type_tab[(int) tcode] = type;
27871 /* Codes for all the SSE/MMX builtins. */
27874 IX86_BUILTIN_ADDPS,
27875 IX86_BUILTIN_ADDSS,
27876 IX86_BUILTIN_DIVPS,
27877 IX86_BUILTIN_DIVSS,
27878 IX86_BUILTIN_MULPS,
27879 IX86_BUILTIN_MULSS,
27880 IX86_BUILTIN_SUBPS,
27881 IX86_BUILTIN_SUBSS,
27883 IX86_BUILTIN_CMPEQPS,
27884 IX86_BUILTIN_CMPLTPS,
27885 IX86_BUILTIN_CMPLEPS,
27886 IX86_BUILTIN_CMPGTPS,
27887 IX86_BUILTIN_CMPGEPS,
27888 IX86_BUILTIN_CMPNEQPS,
27889 IX86_BUILTIN_CMPNLTPS,
27890 IX86_BUILTIN_CMPNLEPS,
27891 IX86_BUILTIN_CMPNGTPS,
27892 IX86_BUILTIN_CMPNGEPS,
27893 IX86_BUILTIN_CMPORDPS,
27894 IX86_BUILTIN_CMPUNORDPS,
27895 IX86_BUILTIN_CMPEQSS,
27896 IX86_BUILTIN_CMPLTSS,
27897 IX86_BUILTIN_CMPLESS,
27898 IX86_BUILTIN_CMPNEQSS,
27899 IX86_BUILTIN_CMPNLTSS,
27900 IX86_BUILTIN_CMPNLESS,
27901 IX86_BUILTIN_CMPORDSS,
27902 IX86_BUILTIN_CMPUNORDSS,
27904 IX86_BUILTIN_COMIEQSS,
27905 IX86_BUILTIN_COMILTSS,
27906 IX86_BUILTIN_COMILESS,
27907 IX86_BUILTIN_COMIGTSS,
27908 IX86_BUILTIN_COMIGESS,
27909 IX86_BUILTIN_COMINEQSS,
27910 IX86_BUILTIN_UCOMIEQSS,
27911 IX86_BUILTIN_UCOMILTSS,
27912 IX86_BUILTIN_UCOMILESS,
27913 IX86_BUILTIN_UCOMIGTSS,
27914 IX86_BUILTIN_UCOMIGESS,
27915 IX86_BUILTIN_UCOMINEQSS,
27917 IX86_BUILTIN_CVTPI2PS,
27918 IX86_BUILTIN_CVTPS2PI,
27919 IX86_BUILTIN_CVTSI2SS,
27920 IX86_BUILTIN_CVTSI642SS,
27921 IX86_BUILTIN_CVTSS2SI,
27922 IX86_BUILTIN_CVTSS2SI64,
27923 IX86_BUILTIN_CVTTPS2PI,
27924 IX86_BUILTIN_CVTTSS2SI,
27925 IX86_BUILTIN_CVTTSS2SI64,
27927 IX86_BUILTIN_MAXPS,
27928 IX86_BUILTIN_MAXSS,
27929 IX86_BUILTIN_MINPS,
27930 IX86_BUILTIN_MINSS,
27932 IX86_BUILTIN_LOADUPS,
27933 IX86_BUILTIN_STOREUPS,
27934 IX86_BUILTIN_MOVSS,
27936 IX86_BUILTIN_MOVHLPS,
27937 IX86_BUILTIN_MOVLHPS,
27938 IX86_BUILTIN_LOADHPS,
27939 IX86_BUILTIN_LOADLPS,
27940 IX86_BUILTIN_STOREHPS,
27941 IX86_BUILTIN_STORELPS,
27943 IX86_BUILTIN_MASKMOVQ,
27944 IX86_BUILTIN_MOVMSKPS,
27945 IX86_BUILTIN_PMOVMSKB,
27947 IX86_BUILTIN_MOVNTPS,
27948 IX86_BUILTIN_MOVNTQ,
27950 IX86_BUILTIN_LOADDQU,
27951 IX86_BUILTIN_STOREDQU,
27953 IX86_BUILTIN_PACKSSWB,
27954 IX86_BUILTIN_PACKSSDW,
27955 IX86_BUILTIN_PACKUSWB,
27957 IX86_BUILTIN_PADDB,
27958 IX86_BUILTIN_PADDW,
27959 IX86_BUILTIN_PADDD,
27960 IX86_BUILTIN_PADDQ,
27961 IX86_BUILTIN_PADDSB,
27962 IX86_BUILTIN_PADDSW,
27963 IX86_BUILTIN_PADDUSB,
27964 IX86_BUILTIN_PADDUSW,
27965 IX86_BUILTIN_PSUBB,
27966 IX86_BUILTIN_PSUBW,
27967 IX86_BUILTIN_PSUBD,
27968 IX86_BUILTIN_PSUBQ,
27969 IX86_BUILTIN_PSUBSB,
27970 IX86_BUILTIN_PSUBSW,
27971 IX86_BUILTIN_PSUBUSB,
27972 IX86_BUILTIN_PSUBUSW,
27975 IX86_BUILTIN_PANDN,
27979 IX86_BUILTIN_PAVGB,
27980 IX86_BUILTIN_PAVGW,
27982 IX86_BUILTIN_PCMPEQB,
27983 IX86_BUILTIN_PCMPEQW,
27984 IX86_BUILTIN_PCMPEQD,
27985 IX86_BUILTIN_PCMPGTB,
27986 IX86_BUILTIN_PCMPGTW,
27987 IX86_BUILTIN_PCMPGTD,
27989 IX86_BUILTIN_PMADDWD,
27991 IX86_BUILTIN_PMAXSW,
27992 IX86_BUILTIN_PMAXUB,
27993 IX86_BUILTIN_PMINSW,
27994 IX86_BUILTIN_PMINUB,
27996 IX86_BUILTIN_PMULHUW,
27997 IX86_BUILTIN_PMULHW,
27998 IX86_BUILTIN_PMULLW,
28000 IX86_BUILTIN_PSADBW,
28001 IX86_BUILTIN_PSHUFW,
28003 IX86_BUILTIN_PSLLW,
28004 IX86_BUILTIN_PSLLD,
28005 IX86_BUILTIN_PSLLQ,
28006 IX86_BUILTIN_PSRAW,
28007 IX86_BUILTIN_PSRAD,
28008 IX86_BUILTIN_PSRLW,
28009 IX86_BUILTIN_PSRLD,
28010 IX86_BUILTIN_PSRLQ,
28011 IX86_BUILTIN_PSLLWI,
28012 IX86_BUILTIN_PSLLDI,
28013 IX86_BUILTIN_PSLLQI,
28014 IX86_BUILTIN_PSRAWI,
28015 IX86_BUILTIN_PSRADI,
28016 IX86_BUILTIN_PSRLWI,
28017 IX86_BUILTIN_PSRLDI,
28018 IX86_BUILTIN_PSRLQI,
28020 IX86_BUILTIN_PUNPCKHBW,
28021 IX86_BUILTIN_PUNPCKHWD,
28022 IX86_BUILTIN_PUNPCKHDQ,
28023 IX86_BUILTIN_PUNPCKLBW,
28024 IX86_BUILTIN_PUNPCKLWD,
28025 IX86_BUILTIN_PUNPCKLDQ,
28027 IX86_BUILTIN_SHUFPS,
28029 IX86_BUILTIN_RCPPS,
28030 IX86_BUILTIN_RCPSS,
28031 IX86_BUILTIN_RSQRTPS,
28032 IX86_BUILTIN_RSQRTPS_NR,
28033 IX86_BUILTIN_RSQRTSS,
28034 IX86_BUILTIN_RSQRTF,
28035 IX86_BUILTIN_SQRTPS,
28036 IX86_BUILTIN_SQRTPS_NR,
28037 IX86_BUILTIN_SQRTSS,
28039 IX86_BUILTIN_UNPCKHPS,
28040 IX86_BUILTIN_UNPCKLPS,
28042 IX86_BUILTIN_ANDPS,
28043 IX86_BUILTIN_ANDNPS,
28045 IX86_BUILTIN_XORPS,
28048 IX86_BUILTIN_LDMXCSR,
28049 IX86_BUILTIN_STMXCSR,
28050 IX86_BUILTIN_SFENCE,
28052 IX86_BUILTIN_FXSAVE,
28053 IX86_BUILTIN_FXRSTOR,
28054 IX86_BUILTIN_FXSAVE64,
28055 IX86_BUILTIN_FXRSTOR64,
28057 IX86_BUILTIN_XSAVE,
28058 IX86_BUILTIN_XRSTOR,
28059 IX86_BUILTIN_XSAVE64,
28060 IX86_BUILTIN_XRSTOR64,
28062 IX86_BUILTIN_XSAVEOPT,
28063 IX86_BUILTIN_XSAVEOPT64,
28065 IX86_BUILTIN_XSAVEC,
28066 IX86_BUILTIN_XSAVEC64,
28068 IX86_BUILTIN_XSAVES,
28069 IX86_BUILTIN_XRSTORS,
28070 IX86_BUILTIN_XSAVES64,
28071 IX86_BUILTIN_XRSTORS64,
28073 /* 3DNow! Original */
28074 IX86_BUILTIN_FEMMS,
28075 IX86_BUILTIN_PAVGUSB,
28076 IX86_BUILTIN_PF2ID,
28077 IX86_BUILTIN_PFACC,
28078 IX86_BUILTIN_PFADD,
28079 IX86_BUILTIN_PFCMPEQ,
28080 IX86_BUILTIN_PFCMPGE,
28081 IX86_BUILTIN_PFCMPGT,
28082 IX86_BUILTIN_PFMAX,
28083 IX86_BUILTIN_PFMIN,
28084 IX86_BUILTIN_PFMUL,
28085 IX86_BUILTIN_PFRCP,
28086 IX86_BUILTIN_PFRCPIT1,
28087 IX86_BUILTIN_PFRCPIT2,
28088 IX86_BUILTIN_PFRSQIT1,
28089 IX86_BUILTIN_PFRSQRT,
28090 IX86_BUILTIN_PFSUB,
28091 IX86_BUILTIN_PFSUBR,
28092 IX86_BUILTIN_PI2FD,
28093 IX86_BUILTIN_PMULHRW,
28095 /* 3DNow! Athlon Extensions */
28096 IX86_BUILTIN_PF2IW,
28097 IX86_BUILTIN_PFNACC,
28098 IX86_BUILTIN_PFPNACC,
28099 IX86_BUILTIN_PI2FW,
28100 IX86_BUILTIN_PSWAPDSI,
28101 IX86_BUILTIN_PSWAPDSF,
28104 IX86_BUILTIN_ADDPD,
28105 IX86_BUILTIN_ADDSD,
28106 IX86_BUILTIN_DIVPD,
28107 IX86_BUILTIN_DIVSD,
28108 IX86_BUILTIN_MULPD,
28109 IX86_BUILTIN_MULSD,
28110 IX86_BUILTIN_SUBPD,
28111 IX86_BUILTIN_SUBSD,
28113 IX86_BUILTIN_CMPEQPD,
28114 IX86_BUILTIN_CMPLTPD,
28115 IX86_BUILTIN_CMPLEPD,
28116 IX86_BUILTIN_CMPGTPD,
28117 IX86_BUILTIN_CMPGEPD,
28118 IX86_BUILTIN_CMPNEQPD,
28119 IX86_BUILTIN_CMPNLTPD,
28120 IX86_BUILTIN_CMPNLEPD,
28121 IX86_BUILTIN_CMPNGTPD,
28122 IX86_BUILTIN_CMPNGEPD,
28123 IX86_BUILTIN_CMPORDPD,
28124 IX86_BUILTIN_CMPUNORDPD,
28125 IX86_BUILTIN_CMPEQSD,
28126 IX86_BUILTIN_CMPLTSD,
28127 IX86_BUILTIN_CMPLESD,
28128 IX86_BUILTIN_CMPNEQSD,
28129 IX86_BUILTIN_CMPNLTSD,
28130 IX86_BUILTIN_CMPNLESD,
28131 IX86_BUILTIN_CMPORDSD,
28132 IX86_BUILTIN_CMPUNORDSD,
28134 IX86_BUILTIN_COMIEQSD,
28135 IX86_BUILTIN_COMILTSD,
28136 IX86_BUILTIN_COMILESD,
28137 IX86_BUILTIN_COMIGTSD,
28138 IX86_BUILTIN_COMIGESD,
28139 IX86_BUILTIN_COMINEQSD,
28140 IX86_BUILTIN_UCOMIEQSD,
28141 IX86_BUILTIN_UCOMILTSD,
28142 IX86_BUILTIN_UCOMILESD,
28143 IX86_BUILTIN_UCOMIGTSD,
28144 IX86_BUILTIN_UCOMIGESD,
28145 IX86_BUILTIN_UCOMINEQSD,
28147 IX86_BUILTIN_MAXPD,
28148 IX86_BUILTIN_MAXSD,
28149 IX86_BUILTIN_MINPD,
28150 IX86_BUILTIN_MINSD,
28152 IX86_BUILTIN_ANDPD,
28153 IX86_BUILTIN_ANDNPD,
28155 IX86_BUILTIN_XORPD,
28157 IX86_BUILTIN_SQRTPD,
28158 IX86_BUILTIN_SQRTSD,
28160 IX86_BUILTIN_UNPCKHPD,
28161 IX86_BUILTIN_UNPCKLPD,
28163 IX86_BUILTIN_SHUFPD,
28165 IX86_BUILTIN_LOADUPD,
28166 IX86_BUILTIN_STOREUPD,
28167 IX86_BUILTIN_MOVSD,
28169 IX86_BUILTIN_LOADHPD,
28170 IX86_BUILTIN_LOADLPD,
28172 IX86_BUILTIN_CVTDQ2PD,
28173 IX86_BUILTIN_CVTDQ2PS,
28175 IX86_BUILTIN_CVTPD2DQ,
28176 IX86_BUILTIN_CVTPD2PI,
28177 IX86_BUILTIN_CVTPD2PS,
28178 IX86_BUILTIN_CVTTPD2DQ,
28179 IX86_BUILTIN_CVTTPD2PI,
28181 IX86_BUILTIN_CVTPI2PD,
28182 IX86_BUILTIN_CVTSI2SD,
28183 IX86_BUILTIN_CVTSI642SD,
28185 IX86_BUILTIN_CVTSD2SI,
28186 IX86_BUILTIN_CVTSD2SI64,
28187 IX86_BUILTIN_CVTSD2SS,
28188 IX86_BUILTIN_CVTSS2SD,
28189 IX86_BUILTIN_CVTTSD2SI,
28190 IX86_BUILTIN_CVTTSD2SI64,
28192 IX86_BUILTIN_CVTPS2DQ,
28193 IX86_BUILTIN_CVTPS2PD,
28194 IX86_BUILTIN_CVTTPS2DQ,
28196 IX86_BUILTIN_MOVNTI,
28197 IX86_BUILTIN_MOVNTI64,
28198 IX86_BUILTIN_MOVNTPD,
28199 IX86_BUILTIN_MOVNTDQ,
28201 IX86_BUILTIN_MOVQ128,
28204 IX86_BUILTIN_MASKMOVDQU,
28205 IX86_BUILTIN_MOVMSKPD,
28206 IX86_BUILTIN_PMOVMSKB128,
28208 IX86_BUILTIN_PACKSSWB128,
28209 IX86_BUILTIN_PACKSSDW128,
28210 IX86_BUILTIN_PACKUSWB128,
28212 IX86_BUILTIN_PADDB128,
28213 IX86_BUILTIN_PADDW128,
28214 IX86_BUILTIN_PADDD128,
28215 IX86_BUILTIN_PADDQ128,
28216 IX86_BUILTIN_PADDSB128,
28217 IX86_BUILTIN_PADDSW128,
28218 IX86_BUILTIN_PADDUSB128,
28219 IX86_BUILTIN_PADDUSW128,
28220 IX86_BUILTIN_PSUBB128,
28221 IX86_BUILTIN_PSUBW128,
28222 IX86_BUILTIN_PSUBD128,
28223 IX86_BUILTIN_PSUBQ128,
28224 IX86_BUILTIN_PSUBSB128,
28225 IX86_BUILTIN_PSUBSW128,
28226 IX86_BUILTIN_PSUBUSB128,
28227 IX86_BUILTIN_PSUBUSW128,
28229 IX86_BUILTIN_PAND128,
28230 IX86_BUILTIN_PANDN128,
28231 IX86_BUILTIN_POR128,
28232 IX86_BUILTIN_PXOR128,
28234 IX86_BUILTIN_PAVGB128,
28235 IX86_BUILTIN_PAVGW128,
28237 IX86_BUILTIN_PCMPEQB128,
28238 IX86_BUILTIN_PCMPEQW128,
28239 IX86_BUILTIN_PCMPEQD128,
28240 IX86_BUILTIN_PCMPGTB128,
28241 IX86_BUILTIN_PCMPGTW128,
28242 IX86_BUILTIN_PCMPGTD128,
28244 IX86_BUILTIN_PMADDWD128,
28246 IX86_BUILTIN_PMAXSW128,
28247 IX86_BUILTIN_PMAXUB128,
28248 IX86_BUILTIN_PMINSW128,
28249 IX86_BUILTIN_PMINUB128,
28251 IX86_BUILTIN_PMULUDQ,
28252 IX86_BUILTIN_PMULUDQ128,
28253 IX86_BUILTIN_PMULHUW128,
28254 IX86_BUILTIN_PMULHW128,
28255 IX86_BUILTIN_PMULLW128,
28257 IX86_BUILTIN_PSADBW128,
28258 IX86_BUILTIN_PSHUFHW,
28259 IX86_BUILTIN_PSHUFLW,
28260 IX86_BUILTIN_PSHUFD,
28262 IX86_BUILTIN_PSLLDQI128,
28263 IX86_BUILTIN_PSLLWI128,
28264 IX86_BUILTIN_PSLLDI128,
28265 IX86_BUILTIN_PSLLQI128,
28266 IX86_BUILTIN_PSRAWI128,
28267 IX86_BUILTIN_PSRADI128,
28268 IX86_BUILTIN_PSRLDQI128,
28269 IX86_BUILTIN_PSRLWI128,
28270 IX86_BUILTIN_PSRLDI128,
28271 IX86_BUILTIN_PSRLQI128,
28273 IX86_BUILTIN_PSLLDQ128,
28274 IX86_BUILTIN_PSLLW128,
28275 IX86_BUILTIN_PSLLD128,
28276 IX86_BUILTIN_PSLLQ128,
28277 IX86_BUILTIN_PSRAW128,
28278 IX86_BUILTIN_PSRAD128,
28279 IX86_BUILTIN_PSRLW128,
28280 IX86_BUILTIN_PSRLD128,
28281 IX86_BUILTIN_PSRLQ128,
28283 IX86_BUILTIN_PUNPCKHBW128,
28284 IX86_BUILTIN_PUNPCKHWD128,
28285 IX86_BUILTIN_PUNPCKHDQ128,
28286 IX86_BUILTIN_PUNPCKHQDQ128,
28287 IX86_BUILTIN_PUNPCKLBW128,
28288 IX86_BUILTIN_PUNPCKLWD128,
28289 IX86_BUILTIN_PUNPCKLDQ128,
28290 IX86_BUILTIN_PUNPCKLQDQ128,
28292 IX86_BUILTIN_CLFLUSH,
28293 IX86_BUILTIN_MFENCE,
28294 IX86_BUILTIN_LFENCE,
28295 IX86_BUILTIN_PAUSE,
28297 IX86_BUILTIN_FNSTENV,
28298 IX86_BUILTIN_FLDENV,
28299 IX86_BUILTIN_FNSTSW,
28300 IX86_BUILTIN_FNCLEX,
28302 IX86_BUILTIN_BSRSI,
28303 IX86_BUILTIN_BSRDI,
28304 IX86_BUILTIN_RDPMC,
28305 IX86_BUILTIN_RDTSC,
28306 IX86_BUILTIN_RDTSCP,
28307 IX86_BUILTIN_ROLQI,
28308 IX86_BUILTIN_ROLHI,
28309 IX86_BUILTIN_RORQI,
28310 IX86_BUILTIN_RORHI,
28313 IX86_BUILTIN_ADDSUBPS,
28314 IX86_BUILTIN_HADDPS,
28315 IX86_BUILTIN_HSUBPS,
28316 IX86_BUILTIN_MOVSHDUP,
28317 IX86_BUILTIN_MOVSLDUP,
28318 IX86_BUILTIN_ADDSUBPD,
28319 IX86_BUILTIN_HADDPD,
28320 IX86_BUILTIN_HSUBPD,
28321 IX86_BUILTIN_LDDQU,
28323 IX86_BUILTIN_MONITOR,
28324 IX86_BUILTIN_MWAIT,
28327 IX86_BUILTIN_PHADDW,
28328 IX86_BUILTIN_PHADDD,
28329 IX86_BUILTIN_PHADDSW,
28330 IX86_BUILTIN_PHSUBW,
28331 IX86_BUILTIN_PHSUBD,
28332 IX86_BUILTIN_PHSUBSW,
28333 IX86_BUILTIN_PMADDUBSW,
28334 IX86_BUILTIN_PMULHRSW,
28335 IX86_BUILTIN_PSHUFB,
28336 IX86_BUILTIN_PSIGNB,
28337 IX86_BUILTIN_PSIGNW,
28338 IX86_BUILTIN_PSIGND,
28339 IX86_BUILTIN_PALIGNR,
28340 IX86_BUILTIN_PABSB,
28341 IX86_BUILTIN_PABSW,
28342 IX86_BUILTIN_PABSD,
28344 IX86_BUILTIN_PHADDW128,
28345 IX86_BUILTIN_PHADDD128,
28346 IX86_BUILTIN_PHADDSW128,
28347 IX86_BUILTIN_PHSUBW128,
28348 IX86_BUILTIN_PHSUBD128,
28349 IX86_BUILTIN_PHSUBSW128,
28350 IX86_BUILTIN_PMADDUBSW128,
28351 IX86_BUILTIN_PMULHRSW128,
28352 IX86_BUILTIN_PSHUFB128,
28353 IX86_BUILTIN_PSIGNB128,
28354 IX86_BUILTIN_PSIGNW128,
28355 IX86_BUILTIN_PSIGND128,
28356 IX86_BUILTIN_PALIGNR128,
28357 IX86_BUILTIN_PABSB128,
28358 IX86_BUILTIN_PABSW128,
28359 IX86_BUILTIN_PABSD128,
28361 /* AMDFAM10 - SSE4A New Instructions. */
28362 IX86_BUILTIN_MOVNTSD,
28363 IX86_BUILTIN_MOVNTSS,
28364 IX86_BUILTIN_EXTRQI,
28365 IX86_BUILTIN_EXTRQ,
28366 IX86_BUILTIN_INSERTQI,
28367 IX86_BUILTIN_INSERTQ,
28370 IX86_BUILTIN_BLENDPD,
28371 IX86_BUILTIN_BLENDPS,
28372 IX86_BUILTIN_BLENDVPD,
28373 IX86_BUILTIN_BLENDVPS,
28374 IX86_BUILTIN_PBLENDVB128,
28375 IX86_BUILTIN_PBLENDW128,
28380 IX86_BUILTIN_INSERTPS128,
28382 IX86_BUILTIN_MOVNTDQA,
28383 IX86_BUILTIN_MPSADBW128,
28384 IX86_BUILTIN_PACKUSDW128,
28385 IX86_BUILTIN_PCMPEQQ,
28386 IX86_BUILTIN_PHMINPOSUW128,
28388 IX86_BUILTIN_PMAXSB128,
28389 IX86_BUILTIN_PMAXSD128,
28390 IX86_BUILTIN_PMAXUD128,
28391 IX86_BUILTIN_PMAXUW128,
28393 IX86_BUILTIN_PMINSB128,
28394 IX86_BUILTIN_PMINSD128,
28395 IX86_BUILTIN_PMINUD128,
28396 IX86_BUILTIN_PMINUW128,
28398 IX86_BUILTIN_PMOVSXBW128,
28399 IX86_BUILTIN_PMOVSXBD128,
28400 IX86_BUILTIN_PMOVSXBQ128,
28401 IX86_BUILTIN_PMOVSXWD128,
28402 IX86_BUILTIN_PMOVSXWQ128,
28403 IX86_BUILTIN_PMOVSXDQ128,
28405 IX86_BUILTIN_PMOVZXBW128,
28406 IX86_BUILTIN_PMOVZXBD128,
28407 IX86_BUILTIN_PMOVZXBQ128,
28408 IX86_BUILTIN_PMOVZXWD128,
28409 IX86_BUILTIN_PMOVZXWQ128,
28410 IX86_BUILTIN_PMOVZXDQ128,
28412 IX86_BUILTIN_PMULDQ128,
28413 IX86_BUILTIN_PMULLD128,
28415 IX86_BUILTIN_ROUNDSD,
28416 IX86_BUILTIN_ROUNDSS,
28418 IX86_BUILTIN_ROUNDPD,
28419 IX86_BUILTIN_ROUNDPS,
28421 IX86_BUILTIN_FLOORPD,
28422 IX86_BUILTIN_CEILPD,
28423 IX86_BUILTIN_TRUNCPD,
28424 IX86_BUILTIN_RINTPD,
28425 IX86_BUILTIN_ROUNDPD_AZ,
28427 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28428 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28429 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28431 IX86_BUILTIN_FLOORPS,
28432 IX86_BUILTIN_CEILPS,
28433 IX86_BUILTIN_TRUNCPS,
28434 IX86_BUILTIN_RINTPS,
28435 IX86_BUILTIN_ROUNDPS_AZ,
28437 IX86_BUILTIN_FLOORPS_SFIX,
28438 IX86_BUILTIN_CEILPS_SFIX,
28439 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28441 IX86_BUILTIN_PTESTZ,
28442 IX86_BUILTIN_PTESTC,
28443 IX86_BUILTIN_PTESTNZC,
28445 IX86_BUILTIN_VEC_INIT_V2SI,
28446 IX86_BUILTIN_VEC_INIT_V4HI,
28447 IX86_BUILTIN_VEC_INIT_V8QI,
28448 IX86_BUILTIN_VEC_EXT_V2DF,
28449 IX86_BUILTIN_VEC_EXT_V2DI,
28450 IX86_BUILTIN_VEC_EXT_V4SF,
28451 IX86_BUILTIN_VEC_EXT_V4SI,
28452 IX86_BUILTIN_VEC_EXT_V8HI,
28453 IX86_BUILTIN_VEC_EXT_V2SI,
28454 IX86_BUILTIN_VEC_EXT_V4HI,
28455 IX86_BUILTIN_VEC_EXT_V16QI,
28456 IX86_BUILTIN_VEC_SET_V2DI,
28457 IX86_BUILTIN_VEC_SET_V4SF,
28458 IX86_BUILTIN_VEC_SET_V4SI,
28459 IX86_BUILTIN_VEC_SET_V8HI,
28460 IX86_BUILTIN_VEC_SET_V4HI,
28461 IX86_BUILTIN_VEC_SET_V16QI,
28463 IX86_BUILTIN_VEC_PACK_SFIX,
28464 IX86_BUILTIN_VEC_PACK_SFIX256,
28467 IX86_BUILTIN_CRC32QI,
28468 IX86_BUILTIN_CRC32HI,
28469 IX86_BUILTIN_CRC32SI,
28470 IX86_BUILTIN_CRC32DI,
28472 IX86_BUILTIN_PCMPESTRI128,
28473 IX86_BUILTIN_PCMPESTRM128,
28474 IX86_BUILTIN_PCMPESTRA128,
28475 IX86_BUILTIN_PCMPESTRC128,
28476 IX86_BUILTIN_PCMPESTRO128,
28477 IX86_BUILTIN_PCMPESTRS128,
28478 IX86_BUILTIN_PCMPESTRZ128,
28479 IX86_BUILTIN_PCMPISTRI128,
28480 IX86_BUILTIN_PCMPISTRM128,
28481 IX86_BUILTIN_PCMPISTRA128,
28482 IX86_BUILTIN_PCMPISTRC128,
28483 IX86_BUILTIN_PCMPISTRO128,
28484 IX86_BUILTIN_PCMPISTRS128,
28485 IX86_BUILTIN_PCMPISTRZ128,
28487 IX86_BUILTIN_PCMPGTQ,
28489 /* AES instructions */
28490 IX86_BUILTIN_AESENC128,
28491 IX86_BUILTIN_AESENCLAST128,
28492 IX86_BUILTIN_AESDEC128,
28493 IX86_BUILTIN_AESDECLAST128,
28494 IX86_BUILTIN_AESIMC128,
28495 IX86_BUILTIN_AESKEYGENASSIST128,
28497 /* PCLMUL instruction */
28498 IX86_BUILTIN_PCLMULQDQ128,
28501 IX86_BUILTIN_ADDPD256,
28502 IX86_BUILTIN_ADDPS256,
28503 IX86_BUILTIN_ADDSUBPD256,
28504 IX86_BUILTIN_ADDSUBPS256,
28505 IX86_BUILTIN_ANDPD256,
28506 IX86_BUILTIN_ANDPS256,
28507 IX86_BUILTIN_ANDNPD256,
28508 IX86_BUILTIN_ANDNPS256,
28509 IX86_BUILTIN_BLENDPD256,
28510 IX86_BUILTIN_BLENDPS256,
28511 IX86_BUILTIN_BLENDVPD256,
28512 IX86_BUILTIN_BLENDVPS256,
28513 IX86_BUILTIN_DIVPD256,
28514 IX86_BUILTIN_DIVPS256,
28515 IX86_BUILTIN_DPPS256,
28516 IX86_BUILTIN_HADDPD256,
28517 IX86_BUILTIN_HADDPS256,
28518 IX86_BUILTIN_HSUBPD256,
28519 IX86_BUILTIN_HSUBPS256,
28520 IX86_BUILTIN_MAXPD256,
28521 IX86_BUILTIN_MAXPS256,
28522 IX86_BUILTIN_MINPD256,
28523 IX86_BUILTIN_MINPS256,
28524 IX86_BUILTIN_MULPD256,
28525 IX86_BUILTIN_MULPS256,
28526 IX86_BUILTIN_ORPD256,
28527 IX86_BUILTIN_ORPS256,
28528 IX86_BUILTIN_SHUFPD256,
28529 IX86_BUILTIN_SHUFPS256,
28530 IX86_BUILTIN_SUBPD256,
28531 IX86_BUILTIN_SUBPS256,
28532 IX86_BUILTIN_XORPD256,
28533 IX86_BUILTIN_XORPS256,
28534 IX86_BUILTIN_CMPSD,
28535 IX86_BUILTIN_CMPSS,
28536 IX86_BUILTIN_CMPPD,
28537 IX86_BUILTIN_CMPPS,
28538 IX86_BUILTIN_CMPPD256,
28539 IX86_BUILTIN_CMPPS256,
28540 IX86_BUILTIN_CVTDQ2PD256,
28541 IX86_BUILTIN_CVTDQ2PS256,
28542 IX86_BUILTIN_CVTPD2PS256,
28543 IX86_BUILTIN_CVTPS2DQ256,
28544 IX86_BUILTIN_CVTPS2PD256,
28545 IX86_BUILTIN_CVTTPD2DQ256,
28546 IX86_BUILTIN_CVTPD2DQ256,
28547 IX86_BUILTIN_CVTTPS2DQ256,
28548 IX86_BUILTIN_EXTRACTF128PD256,
28549 IX86_BUILTIN_EXTRACTF128PS256,
28550 IX86_BUILTIN_EXTRACTF128SI256,
28551 IX86_BUILTIN_VZEROALL,
28552 IX86_BUILTIN_VZEROUPPER,
28553 IX86_BUILTIN_VPERMILVARPD,
28554 IX86_BUILTIN_VPERMILVARPS,
28555 IX86_BUILTIN_VPERMILVARPD256,
28556 IX86_BUILTIN_VPERMILVARPS256,
28557 IX86_BUILTIN_VPERMILPD,
28558 IX86_BUILTIN_VPERMILPS,
28559 IX86_BUILTIN_VPERMILPD256,
28560 IX86_BUILTIN_VPERMILPS256,
28561 IX86_BUILTIN_VPERMIL2PD,
28562 IX86_BUILTIN_VPERMIL2PS,
28563 IX86_BUILTIN_VPERMIL2PD256,
28564 IX86_BUILTIN_VPERMIL2PS256,
28565 IX86_BUILTIN_VPERM2F128PD256,
28566 IX86_BUILTIN_VPERM2F128PS256,
28567 IX86_BUILTIN_VPERM2F128SI256,
28568 IX86_BUILTIN_VBROADCASTSS,
28569 IX86_BUILTIN_VBROADCASTSD256,
28570 IX86_BUILTIN_VBROADCASTSS256,
28571 IX86_BUILTIN_VBROADCASTPD256,
28572 IX86_BUILTIN_VBROADCASTPS256,
28573 IX86_BUILTIN_VINSERTF128PD256,
28574 IX86_BUILTIN_VINSERTF128PS256,
28575 IX86_BUILTIN_VINSERTF128SI256,
28576 IX86_BUILTIN_LOADUPD256,
28577 IX86_BUILTIN_LOADUPS256,
28578 IX86_BUILTIN_STOREUPD256,
28579 IX86_BUILTIN_STOREUPS256,
28580 IX86_BUILTIN_LDDQU256,
28581 IX86_BUILTIN_MOVNTDQ256,
28582 IX86_BUILTIN_MOVNTPD256,
28583 IX86_BUILTIN_MOVNTPS256,
28584 IX86_BUILTIN_LOADDQU256,
28585 IX86_BUILTIN_STOREDQU256,
28586 IX86_BUILTIN_MASKLOADPD,
28587 IX86_BUILTIN_MASKLOADPS,
28588 IX86_BUILTIN_MASKSTOREPD,
28589 IX86_BUILTIN_MASKSTOREPS,
28590 IX86_BUILTIN_MASKLOADPD256,
28591 IX86_BUILTIN_MASKLOADPS256,
28592 IX86_BUILTIN_MASKSTOREPD256,
28593 IX86_BUILTIN_MASKSTOREPS256,
28594 IX86_BUILTIN_MOVSHDUP256,
28595 IX86_BUILTIN_MOVSLDUP256,
28596 IX86_BUILTIN_MOVDDUP256,
28598 IX86_BUILTIN_SQRTPD256,
28599 IX86_BUILTIN_SQRTPS256,
28600 IX86_BUILTIN_SQRTPS_NR256,
28601 IX86_BUILTIN_RSQRTPS256,
28602 IX86_BUILTIN_RSQRTPS_NR256,
28604 IX86_BUILTIN_RCPPS256,
28606 IX86_BUILTIN_ROUNDPD256,
28607 IX86_BUILTIN_ROUNDPS256,
28609 IX86_BUILTIN_FLOORPD256,
28610 IX86_BUILTIN_CEILPD256,
28611 IX86_BUILTIN_TRUNCPD256,
28612 IX86_BUILTIN_RINTPD256,
28613 IX86_BUILTIN_ROUNDPD_AZ256,
28615 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28616 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28617 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28619 IX86_BUILTIN_FLOORPS256,
28620 IX86_BUILTIN_CEILPS256,
28621 IX86_BUILTIN_TRUNCPS256,
28622 IX86_BUILTIN_RINTPS256,
28623 IX86_BUILTIN_ROUNDPS_AZ256,
28625 IX86_BUILTIN_FLOORPS_SFIX256,
28626 IX86_BUILTIN_CEILPS_SFIX256,
28627 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28629 IX86_BUILTIN_UNPCKHPD256,
28630 IX86_BUILTIN_UNPCKLPD256,
28631 IX86_BUILTIN_UNPCKHPS256,
28632 IX86_BUILTIN_UNPCKLPS256,
28634 IX86_BUILTIN_SI256_SI,
28635 IX86_BUILTIN_PS256_PS,
28636 IX86_BUILTIN_PD256_PD,
28637 IX86_BUILTIN_SI_SI256,
28638 IX86_BUILTIN_PS_PS256,
28639 IX86_BUILTIN_PD_PD256,
28641 IX86_BUILTIN_VTESTZPD,
28642 IX86_BUILTIN_VTESTCPD,
28643 IX86_BUILTIN_VTESTNZCPD,
28644 IX86_BUILTIN_VTESTZPS,
28645 IX86_BUILTIN_VTESTCPS,
28646 IX86_BUILTIN_VTESTNZCPS,
28647 IX86_BUILTIN_VTESTZPD256,
28648 IX86_BUILTIN_VTESTCPD256,
28649 IX86_BUILTIN_VTESTNZCPD256,
28650 IX86_BUILTIN_VTESTZPS256,
28651 IX86_BUILTIN_VTESTCPS256,
28652 IX86_BUILTIN_VTESTNZCPS256,
28653 IX86_BUILTIN_PTESTZ256,
28654 IX86_BUILTIN_PTESTC256,
28655 IX86_BUILTIN_PTESTNZC256,
28657 IX86_BUILTIN_MOVMSKPD256,
28658 IX86_BUILTIN_MOVMSKPS256,
28661 IX86_BUILTIN_MPSADBW256,
28662 IX86_BUILTIN_PABSB256,
28663 IX86_BUILTIN_PABSW256,
28664 IX86_BUILTIN_PABSD256,
28665 IX86_BUILTIN_PACKSSDW256,
28666 IX86_BUILTIN_PACKSSWB256,
28667 IX86_BUILTIN_PACKUSDW256,
28668 IX86_BUILTIN_PACKUSWB256,
28669 IX86_BUILTIN_PADDB256,
28670 IX86_BUILTIN_PADDW256,
28671 IX86_BUILTIN_PADDD256,
28672 IX86_BUILTIN_PADDQ256,
28673 IX86_BUILTIN_PADDSB256,
28674 IX86_BUILTIN_PADDSW256,
28675 IX86_BUILTIN_PADDUSB256,
28676 IX86_BUILTIN_PADDUSW256,
28677 IX86_BUILTIN_PALIGNR256,
28678 IX86_BUILTIN_AND256I,
28679 IX86_BUILTIN_ANDNOT256I,
28680 IX86_BUILTIN_PAVGB256,
28681 IX86_BUILTIN_PAVGW256,
28682 IX86_BUILTIN_PBLENDVB256,
28683 IX86_BUILTIN_PBLENDVW256,
28684 IX86_BUILTIN_PCMPEQB256,
28685 IX86_BUILTIN_PCMPEQW256,
28686 IX86_BUILTIN_PCMPEQD256,
28687 IX86_BUILTIN_PCMPEQQ256,
28688 IX86_BUILTIN_PCMPGTB256,
28689 IX86_BUILTIN_PCMPGTW256,
28690 IX86_BUILTIN_PCMPGTD256,
28691 IX86_BUILTIN_PCMPGTQ256,
28692 IX86_BUILTIN_PHADDW256,
28693 IX86_BUILTIN_PHADDD256,
28694 IX86_BUILTIN_PHADDSW256,
28695 IX86_BUILTIN_PHSUBW256,
28696 IX86_BUILTIN_PHSUBD256,
28697 IX86_BUILTIN_PHSUBSW256,
28698 IX86_BUILTIN_PMADDUBSW256,
28699 IX86_BUILTIN_PMADDWD256,
28700 IX86_BUILTIN_PMAXSB256,
28701 IX86_BUILTIN_PMAXSW256,
28702 IX86_BUILTIN_PMAXSD256,
28703 IX86_BUILTIN_PMAXUB256,
28704 IX86_BUILTIN_PMAXUW256,
28705 IX86_BUILTIN_PMAXUD256,
28706 IX86_BUILTIN_PMINSB256,
28707 IX86_BUILTIN_PMINSW256,
28708 IX86_BUILTIN_PMINSD256,
28709 IX86_BUILTIN_PMINUB256,
28710 IX86_BUILTIN_PMINUW256,
28711 IX86_BUILTIN_PMINUD256,
28712 IX86_BUILTIN_PMOVMSKB256,
28713 IX86_BUILTIN_PMOVSXBW256,
28714 IX86_BUILTIN_PMOVSXBD256,
28715 IX86_BUILTIN_PMOVSXBQ256,
28716 IX86_BUILTIN_PMOVSXWD256,
28717 IX86_BUILTIN_PMOVSXWQ256,
28718 IX86_BUILTIN_PMOVSXDQ256,
28719 IX86_BUILTIN_PMOVZXBW256,
28720 IX86_BUILTIN_PMOVZXBD256,
28721 IX86_BUILTIN_PMOVZXBQ256,
28722 IX86_BUILTIN_PMOVZXWD256,
28723 IX86_BUILTIN_PMOVZXWQ256,
28724 IX86_BUILTIN_PMOVZXDQ256,
28725 IX86_BUILTIN_PMULDQ256,
28726 IX86_BUILTIN_PMULHRSW256,
28727 IX86_BUILTIN_PMULHUW256,
28728 IX86_BUILTIN_PMULHW256,
28729 IX86_BUILTIN_PMULLW256,
28730 IX86_BUILTIN_PMULLD256,
28731 IX86_BUILTIN_PMULUDQ256,
28732 IX86_BUILTIN_POR256,
28733 IX86_BUILTIN_PSADBW256,
28734 IX86_BUILTIN_PSHUFB256,
28735 IX86_BUILTIN_PSHUFD256,
28736 IX86_BUILTIN_PSHUFHW256,
28737 IX86_BUILTIN_PSHUFLW256,
28738 IX86_BUILTIN_PSIGNB256,
28739 IX86_BUILTIN_PSIGNW256,
28740 IX86_BUILTIN_PSIGND256,
28741 IX86_BUILTIN_PSLLDQI256,
28742 IX86_BUILTIN_PSLLWI256,
28743 IX86_BUILTIN_PSLLW256,
28744 IX86_BUILTIN_PSLLDI256,
28745 IX86_BUILTIN_PSLLD256,
28746 IX86_BUILTIN_PSLLQI256,
28747 IX86_BUILTIN_PSLLQ256,
28748 IX86_BUILTIN_PSRAWI256,
28749 IX86_BUILTIN_PSRAW256,
28750 IX86_BUILTIN_PSRADI256,
28751 IX86_BUILTIN_PSRAD256,
28752 IX86_BUILTIN_PSRLDQI256,
28753 IX86_BUILTIN_PSRLWI256,
28754 IX86_BUILTIN_PSRLW256,
28755 IX86_BUILTIN_PSRLDI256,
28756 IX86_BUILTIN_PSRLD256,
28757 IX86_BUILTIN_PSRLQI256,
28758 IX86_BUILTIN_PSRLQ256,
28759 IX86_BUILTIN_PSUBB256,
28760 IX86_BUILTIN_PSUBW256,
28761 IX86_BUILTIN_PSUBD256,
28762 IX86_BUILTIN_PSUBQ256,
28763 IX86_BUILTIN_PSUBSB256,
28764 IX86_BUILTIN_PSUBSW256,
28765 IX86_BUILTIN_PSUBUSB256,
28766 IX86_BUILTIN_PSUBUSW256,
28767 IX86_BUILTIN_PUNPCKHBW256,
28768 IX86_BUILTIN_PUNPCKHWD256,
28769 IX86_BUILTIN_PUNPCKHDQ256,
28770 IX86_BUILTIN_PUNPCKHQDQ256,
28771 IX86_BUILTIN_PUNPCKLBW256,
28772 IX86_BUILTIN_PUNPCKLWD256,
28773 IX86_BUILTIN_PUNPCKLDQ256,
28774 IX86_BUILTIN_PUNPCKLQDQ256,
28775 IX86_BUILTIN_PXOR256,
28776 IX86_BUILTIN_MOVNTDQA256,
28777 IX86_BUILTIN_VBROADCASTSS_PS,
28778 IX86_BUILTIN_VBROADCASTSS_PS256,
28779 IX86_BUILTIN_VBROADCASTSD_PD256,
28780 IX86_BUILTIN_VBROADCASTSI256,
28781 IX86_BUILTIN_PBLENDD256,
28782 IX86_BUILTIN_PBLENDD128,
28783 IX86_BUILTIN_PBROADCASTB256,
28784 IX86_BUILTIN_PBROADCASTW256,
28785 IX86_BUILTIN_PBROADCASTD256,
28786 IX86_BUILTIN_PBROADCASTQ256,
28787 IX86_BUILTIN_PBROADCASTB128,
28788 IX86_BUILTIN_PBROADCASTW128,
28789 IX86_BUILTIN_PBROADCASTD128,
28790 IX86_BUILTIN_PBROADCASTQ128,
28791 IX86_BUILTIN_VPERMVARSI256,
28792 IX86_BUILTIN_VPERMDF256,
28793 IX86_BUILTIN_VPERMVARSF256,
28794 IX86_BUILTIN_VPERMDI256,
28795 IX86_BUILTIN_VPERMTI256,
28796 IX86_BUILTIN_VEXTRACT128I256,
28797 IX86_BUILTIN_VINSERT128I256,
28798 IX86_BUILTIN_MASKLOADD,
28799 IX86_BUILTIN_MASKLOADQ,
28800 IX86_BUILTIN_MASKLOADD256,
28801 IX86_BUILTIN_MASKLOADQ256,
28802 IX86_BUILTIN_MASKSTORED,
28803 IX86_BUILTIN_MASKSTOREQ,
28804 IX86_BUILTIN_MASKSTORED256,
28805 IX86_BUILTIN_MASKSTOREQ256,
28806 IX86_BUILTIN_PSLLVV4DI,
28807 IX86_BUILTIN_PSLLVV2DI,
28808 IX86_BUILTIN_PSLLVV8SI,
28809 IX86_BUILTIN_PSLLVV4SI,
28810 IX86_BUILTIN_PSRAVV8SI,
28811 IX86_BUILTIN_PSRAVV4SI,
28812 IX86_BUILTIN_PSRLVV4DI,
28813 IX86_BUILTIN_PSRLVV2DI,
28814 IX86_BUILTIN_PSRLVV8SI,
28815 IX86_BUILTIN_PSRLVV4SI,
28817 IX86_BUILTIN_GATHERSIV2DF,
28818 IX86_BUILTIN_GATHERSIV4DF,
28819 IX86_BUILTIN_GATHERDIV2DF,
28820 IX86_BUILTIN_GATHERDIV4DF,
28821 IX86_BUILTIN_GATHERSIV4SF,
28822 IX86_BUILTIN_GATHERSIV8SF,
28823 IX86_BUILTIN_GATHERDIV4SF,
28824 IX86_BUILTIN_GATHERDIV8SF,
28825 IX86_BUILTIN_GATHERSIV2DI,
28826 IX86_BUILTIN_GATHERSIV4DI,
28827 IX86_BUILTIN_GATHERDIV2DI,
28828 IX86_BUILTIN_GATHERDIV4DI,
28829 IX86_BUILTIN_GATHERSIV4SI,
28830 IX86_BUILTIN_GATHERSIV8SI,
28831 IX86_BUILTIN_GATHERDIV4SI,
28832 IX86_BUILTIN_GATHERDIV8SI,
28835 IX86_BUILTIN_SI512_SI256,
28836 IX86_BUILTIN_PD512_PD256,
28837 IX86_BUILTIN_PS512_PS256,
28838 IX86_BUILTIN_SI512_SI,
28839 IX86_BUILTIN_PD512_PD,
28840 IX86_BUILTIN_PS512_PS,
28841 IX86_BUILTIN_ADDPD512,
28842 IX86_BUILTIN_ADDPS512,
28843 IX86_BUILTIN_ADDSD_ROUND,
28844 IX86_BUILTIN_ADDSS_ROUND,
28845 IX86_BUILTIN_ALIGND512,
28846 IX86_BUILTIN_ALIGNQ512,
28847 IX86_BUILTIN_BLENDMD512,
28848 IX86_BUILTIN_BLENDMPD512,
28849 IX86_BUILTIN_BLENDMPS512,
28850 IX86_BUILTIN_BLENDMQ512,
28851 IX86_BUILTIN_BROADCASTF32X4_512,
28852 IX86_BUILTIN_BROADCASTF64X4_512,
28853 IX86_BUILTIN_BROADCASTI32X4_512,
28854 IX86_BUILTIN_BROADCASTI64X4_512,
28855 IX86_BUILTIN_BROADCASTSD512,
28856 IX86_BUILTIN_BROADCASTSS512,
28857 IX86_BUILTIN_CMPD512,
28858 IX86_BUILTIN_CMPPD512,
28859 IX86_BUILTIN_CMPPS512,
28860 IX86_BUILTIN_CMPQ512,
28861 IX86_BUILTIN_CMPSD_MASK,
28862 IX86_BUILTIN_CMPSS_MASK,
28863 IX86_BUILTIN_COMIDF,
28864 IX86_BUILTIN_COMISF,
28865 IX86_BUILTIN_COMPRESSPD512,
28866 IX86_BUILTIN_COMPRESSPDSTORE512,
28867 IX86_BUILTIN_COMPRESSPS512,
28868 IX86_BUILTIN_COMPRESSPSSTORE512,
28869 IX86_BUILTIN_CVTDQ2PD512,
28870 IX86_BUILTIN_CVTDQ2PS512,
28871 IX86_BUILTIN_CVTPD2DQ512,
28872 IX86_BUILTIN_CVTPD2PS512,
28873 IX86_BUILTIN_CVTPD2UDQ512,
28874 IX86_BUILTIN_CVTPH2PS512,
28875 IX86_BUILTIN_CVTPS2DQ512,
28876 IX86_BUILTIN_CVTPS2PD512,
28877 IX86_BUILTIN_CVTPS2PH512,
28878 IX86_BUILTIN_CVTPS2UDQ512,
28879 IX86_BUILTIN_CVTSD2SS_ROUND,
28880 IX86_BUILTIN_CVTSI2SD64,
28881 IX86_BUILTIN_CVTSI2SS32,
28882 IX86_BUILTIN_CVTSI2SS64,
28883 IX86_BUILTIN_CVTSS2SD_ROUND,
28884 IX86_BUILTIN_CVTTPD2DQ512,
28885 IX86_BUILTIN_CVTTPD2UDQ512,
28886 IX86_BUILTIN_CVTTPS2DQ512,
28887 IX86_BUILTIN_CVTTPS2UDQ512,
28888 IX86_BUILTIN_CVTUDQ2PD512,
28889 IX86_BUILTIN_CVTUDQ2PS512,
28890 IX86_BUILTIN_CVTUSI2SD32,
28891 IX86_BUILTIN_CVTUSI2SD64,
28892 IX86_BUILTIN_CVTUSI2SS32,
28893 IX86_BUILTIN_CVTUSI2SS64,
28894 IX86_BUILTIN_DIVPD512,
28895 IX86_BUILTIN_DIVPS512,
28896 IX86_BUILTIN_DIVSD_ROUND,
28897 IX86_BUILTIN_DIVSS_ROUND,
28898 IX86_BUILTIN_EXPANDPD512,
28899 IX86_BUILTIN_EXPANDPD512Z,
28900 IX86_BUILTIN_EXPANDPDLOAD512,
28901 IX86_BUILTIN_EXPANDPDLOAD512Z,
28902 IX86_BUILTIN_EXPANDPS512,
28903 IX86_BUILTIN_EXPANDPS512Z,
28904 IX86_BUILTIN_EXPANDPSLOAD512,
28905 IX86_BUILTIN_EXPANDPSLOAD512Z,
28906 IX86_BUILTIN_EXTRACTF32X4,
28907 IX86_BUILTIN_EXTRACTF64X4,
28908 IX86_BUILTIN_EXTRACTI32X4,
28909 IX86_BUILTIN_EXTRACTI64X4,
28910 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28911 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28912 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28913 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28914 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28915 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28916 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28917 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28918 IX86_BUILTIN_GETEXPPD512,
28919 IX86_BUILTIN_GETEXPPS512,
28920 IX86_BUILTIN_GETEXPSD128,
28921 IX86_BUILTIN_GETEXPSS128,
28922 IX86_BUILTIN_GETMANTPD512,
28923 IX86_BUILTIN_GETMANTPS512,
28924 IX86_BUILTIN_GETMANTSD128,
28925 IX86_BUILTIN_GETMANTSS128,
28926 IX86_BUILTIN_INSERTF32X4,
28927 IX86_BUILTIN_INSERTF64X4,
28928 IX86_BUILTIN_INSERTI32X4,
28929 IX86_BUILTIN_INSERTI64X4,
28930 IX86_BUILTIN_LOADAPD512,
28931 IX86_BUILTIN_LOADAPS512,
28932 IX86_BUILTIN_LOADDQUDI512,
28933 IX86_BUILTIN_LOADDQUSI512,
28934 IX86_BUILTIN_LOADUPD512,
28935 IX86_BUILTIN_LOADUPS512,
28936 IX86_BUILTIN_MAXPD512,
28937 IX86_BUILTIN_MAXPS512,
28938 IX86_BUILTIN_MAXSD_ROUND,
28939 IX86_BUILTIN_MAXSS_ROUND,
28940 IX86_BUILTIN_MINPD512,
28941 IX86_BUILTIN_MINPS512,
28942 IX86_BUILTIN_MINSD_ROUND,
28943 IX86_BUILTIN_MINSS_ROUND,
28944 IX86_BUILTIN_MOVAPD512,
28945 IX86_BUILTIN_MOVAPS512,
28946 IX86_BUILTIN_MOVDDUP512,
28947 IX86_BUILTIN_MOVDQA32LOAD512,
28948 IX86_BUILTIN_MOVDQA32STORE512,
28949 IX86_BUILTIN_MOVDQA32_512,
28950 IX86_BUILTIN_MOVDQA64LOAD512,
28951 IX86_BUILTIN_MOVDQA64STORE512,
28952 IX86_BUILTIN_MOVDQA64_512,
28953 IX86_BUILTIN_MOVNTDQ512,
28954 IX86_BUILTIN_MOVNTDQA512,
28955 IX86_BUILTIN_MOVNTPD512,
28956 IX86_BUILTIN_MOVNTPS512,
28957 IX86_BUILTIN_MOVSHDUP512,
28958 IX86_BUILTIN_MOVSLDUP512,
28959 IX86_BUILTIN_MULPD512,
28960 IX86_BUILTIN_MULPS512,
28961 IX86_BUILTIN_MULSD_ROUND,
28962 IX86_BUILTIN_MULSS_ROUND,
28963 IX86_BUILTIN_PABSD512,
28964 IX86_BUILTIN_PABSQ512,
28965 IX86_BUILTIN_PADDD512,
28966 IX86_BUILTIN_PADDQ512,
28967 IX86_BUILTIN_PANDD512,
28968 IX86_BUILTIN_PANDND512,
28969 IX86_BUILTIN_PANDNQ512,
28970 IX86_BUILTIN_PANDQ512,
28971 IX86_BUILTIN_PBROADCASTD512,
28972 IX86_BUILTIN_PBROADCASTD512_GPR,
28973 IX86_BUILTIN_PBROADCASTMB512,
28974 IX86_BUILTIN_PBROADCASTMW512,
28975 IX86_BUILTIN_PBROADCASTQ512,
28976 IX86_BUILTIN_PBROADCASTQ512_GPR,
28977 IX86_BUILTIN_PCMPEQD512_MASK,
28978 IX86_BUILTIN_PCMPEQQ512_MASK,
28979 IX86_BUILTIN_PCMPGTD512_MASK,
28980 IX86_BUILTIN_PCMPGTQ512_MASK,
28981 IX86_BUILTIN_PCOMPRESSD512,
28982 IX86_BUILTIN_PCOMPRESSDSTORE512,
28983 IX86_BUILTIN_PCOMPRESSQ512,
28984 IX86_BUILTIN_PCOMPRESSQSTORE512,
28985 IX86_BUILTIN_PEXPANDD512,
28986 IX86_BUILTIN_PEXPANDD512Z,
28987 IX86_BUILTIN_PEXPANDDLOAD512,
28988 IX86_BUILTIN_PEXPANDDLOAD512Z,
28989 IX86_BUILTIN_PEXPANDQ512,
28990 IX86_BUILTIN_PEXPANDQ512Z,
28991 IX86_BUILTIN_PEXPANDQLOAD512,
28992 IX86_BUILTIN_PEXPANDQLOAD512Z,
28993 IX86_BUILTIN_PMAXSD512,
28994 IX86_BUILTIN_PMAXSQ512,
28995 IX86_BUILTIN_PMAXUD512,
28996 IX86_BUILTIN_PMAXUQ512,
28997 IX86_BUILTIN_PMINSD512,
28998 IX86_BUILTIN_PMINSQ512,
28999 IX86_BUILTIN_PMINUD512,
29000 IX86_BUILTIN_PMINUQ512,
29001 IX86_BUILTIN_PMOVDB512,
29002 IX86_BUILTIN_PMOVDB512_MEM,
29003 IX86_BUILTIN_PMOVDW512,
29004 IX86_BUILTIN_PMOVDW512_MEM,
29005 IX86_BUILTIN_PMOVQB512,
29006 IX86_BUILTIN_PMOVQB512_MEM,
29007 IX86_BUILTIN_PMOVQD512,
29008 IX86_BUILTIN_PMOVQD512_MEM,
29009 IX86_BUILTIN_PMOVQW512,
29010 IX86_BUILTIN_PMOVQW512_MEM,
29011 IX86_BUILTIN_PMOVSDB512,
29012 IX86_BUILTIN_PMOVSDB512_MEM,
29013 IX86_BUILTIN_PMOVSDW512,
29014 IX86_BUILTIN_PMOVSDW512_MEM,
29015 IX86_BUILTIN_PMOVSQB512,
29016 IX86_BUILTIN_PMOVSQB512_MEM,
29017 IX86_BUILTIN_PMOVSQD512,
29018 IX86_BUILTIN_PMOVSQD512_MEM,
29019 IX86_BUILTIN_PMOVSQW512,
29020 IX86_BUILTIN_PMOVSQW512_MEM,
29021 IX86_BUILTIN_PMOVSXBD512,
29022 IX86_BUILTIN_PMOVSXBQ512,
29023 IX86_BUILTIN_PMOVSXDQ512,
29024 IX86_BUILTIN_PMOVSXWD512,
29025 IX86_BUILTIN_PMOVSXWQ512,
29026 IX86_BUILTIN_PMOVUSDB512,
29027 IX86_BUILTIN_PMOVUSDB512_MEM,
29028 IX86_BUILTIN_PMOVUSDW512,
29029 IX86_BUILTIN_PMOVUSDW512_MEM,
29030 IX86_BUILTIN_PMOVUSQB512,
29031 IX86_BUILTIN_PMOVUSQB512_MEM,
29032 IX86_BUILTIN_PMOVUSQD512,
29033 IX86_BUILTIN_PMOVUSQD512_MEM,
29034 IX86_BUILTIN_PMOVUSQW512,
29035 IX86_BUILTIN_PMOVUSQW512_MEM,
29036 IX86_BUILTIN_PMOVZXBD512,
29037 IX86_BUILTIN_PMOVZXBQ512,
29038 IX86_BUILTIN_PMOVZXDQ512,
29039 IX86_BUILTIN_PMOVZXWD512,
29040 IX86_BUILTIN_PMOVZXWQ512,
29041 IX86_BUILTIN_PMULDQ512,
29042 IX86_BUILTIN_PMULLD512,
29043 IX86_BUILTIN_PMULUDQ512,
29044 IX86_BUILTIN_PORD512,
29045 IX86_BUILTIN_PORQ512,
29046 IX86_BUILTIN_PROLD512,
29047 IX86_BUILTIN_PROLQ512,
29048 IX86_BUILTIN_PROLVD512,
29049 IX86_BUILTIN_PROLVQ512,
29050 IX86_BUILTIN_PRORD512,
29051 IX86_BUILTIN_PRORQ512,
29052 IX86_BUILTIN_PRORVD512,
29053 IX86_BUILTIN_PRORVQ512,
29054 IX86_BUILTIN_PSHUFD512,
29055 IX86_BUILTIN_PSLLD512,
29056 IX86_BUILTIN_PSLLDI512,
29057 IX86_BUILTIN_PSLLQ512,
29058 IX86_BUILTIN_PSLLQI512,
29059 IX86_BUILTIN_PSLLVV16SI,
29060 IX86_BUILTIN_PSLLVV8DI,
29061 IX86_BUILTIN_PSRAD512,
29062 IX86_BUILTIN_PSRADI512,
29063 IX86_BUILTIN_PSRAQ512,
29064 IX86_BUILTIN_PSRAQI512,
29065 IX86_BUILTIN_PSRAVV16SI,
29066 IX86_BUILTIN_PSRAVV8DI,
29067 IX86_BUILTIN_PSRLD512,
29068 IX86_BUILTIN_PSRLDI512,
29069 IX86_BUILTIN_PSRLQ512,
29070 IX86_BUILTIN_PSRLQI512,
29071 IX86_BUILTIN_PSRLVV16SI,
29072 IX86_BUILTIN_PSRLVV8DI,
29073 IX86_BUILTIN_PSUBD512,
29074 IX86_BUILTIN_PSUBQ512,
29075 IX86_BUILTIN_PTESTMD512,
29076 IX86_BUILTIN_PTESTMQ512,
29077 IX86_BUILTIN_PTESTNMD512,
29078 IX86_BUILTIN_PTESTNMQ512,
29079 IX86_BUILTIN_PUNPCKHDQ512,
29080 IX86_BUILTIN_PUNPCKHQDQ512,
29081 IX86_BUILTIN_PUNPCKLDQ512,
29082 IX86_BUILTIN_PUNPCKLQDQ512,
29083 IX86_BUILTIN_PXORD512,
29084 IX86_BUILTIN_PXORQ512,
29085 IX86_BUILTIN_RCP14PD512,
29086 IX86_BUILTIN_RCP14PS512,
29087 IX86_BUILTIN_RCP14SD,
29088 IX86_BUILTIN_RCP14SS,
29089 IX86_BUILTIN_RNDSCALEPD,
29090 IX86_BUILTIN_RNDSCALEPS,
29091 IX86_BUILTIN_RNDSCALESD,
29092 IX86_BUILTIN_RNDSCALESS,
29093 IX86_BUILTIN_RSQRT14PD512,
29094 IX86_BUILTIN_RSQRT14PS512,
29095 IX86_BUILTIN_RSQRT14SD,
29096 IX86_BUILTIN_RSQRT14SS,
29097 IX86_BUILTIN_SCALEFPD512,
29098 IX86_BUILTIN_SCALEFPS512,
29099 IX86_BUILTIN_SCALEFSD,
29100 IX86_BUILTIN_SCALEFSS,
29101 IX86_BUILTIN_SHUFPD512,
29102 IX86_BUILTIN_SHUFPS512,
29103 IX86_BUILTIN_SHUF_F32x4,
29104 IX86_BUILTIN_SHUF_F64x2,
29105 IX86_BUILTIN_SHUF_I32x4,
29106 IX86_BUILTIN_SHUF_I64x2,
29107 IX86_BUILTIN_SQRTPD512,
29108 IX86_BUILTIN_SQRTPD512_MASK,
29109 IX86_BUILTIN_SQRTPS512_MASK,
29110 IX86_BUILTIN_SQRTPS_NR512,
29111 IX86_BUILTIN_SQRTSD_ROUND,
29112 IX86_BUILTIN_SQRTSS_ROUND,
29113 IX86_BUILTIN_STOREAPD512,
29114 IX86_BUILTIN_STOREAPS512,
29115 IX86_BUILTIN_STOREDQUDI512,
29116 IX86_BUILTIN_STOREDQUSI512,
29117 IX86_BUILTIN_STOREUPD512,
29118 IX86_BUILTIN_STOREUPS512,
29119 IX86_BUILTIN_SUBPD512,
29120 IX86_BUILTIN_SUBPS512,
29121 IX86_BUILTIN_SUBSD_ROUND,
29122 IX86_BUILTIN_SUBSS_ROUND,
29123 IX86_BUILTIN_UCMPD512,
29124 IX86_BUILTIN_UCMPQ512,
29125 IX86_BUILTIN_UNPCKHPD512,
29126 IX86_BUILTIN_UNPCKHPS512,
29127 IX86_BUILTIN_UNPCKLPD512,
29128 IX86_BUILTIN_UNPCKLPS512,
29129 IX86_BUILTIN_VCVTSD2SI32,
29130 IX86_BUILTIN_VCVTSD2SI64,
29131 IX86_BUILTIN_VCVTSD2USI32,
29132 IX86_BUILTIN_VCVTSD2USI64,
29133 IX86_BUILTIN_VCVTSS2SI32,
29134 IX86_BUILTIN_VCVTSS2SI64,
29135 IX86_BUILTIN_VCVTSS2USI32,
29136 IX86_BUILTIN_VCVTSS2USI64,
29137 IX86_BUILTIN_VCVTTSD2SI32,
29138 IX86_BUILTIN_VCVTTSD2SI64,
29139 IX86_BUILTIN_VCVTTSD2USI32,
29140 IX86_BUILTIN_VCVTTSD2USI64,
29141 IX86_BUILTIN_VCVTTSS2SI32,
29142 IX86_BUILTIN_VCVTTSS2SI64,
29143 IX86_BUILTIN_VCVTTSS2USI32,
29144 IX86_BUILTIN_VCVTTSS2USI64,
29145 IX86_BUILTIN_VFMADDPD512_MASK,
29146 IX86_BUILTIN_VFMADDPD512_MASK3,
29147 IX86_BUILTIN_VFMADDPD512_MASKZ,
29148 IX86_BUILTIN_VFMADDPS512_MASK,
29149 IX86_BUILTIN_VFMADDPS512_MASK3,
29150 IX86_BUILTIN_VFMADDPS512_MASKZ,
29151 IX86_BUILTIN_VFMADDSD3_ROUND,
29152 IX86_BUILTIN_VFMADDSS3_ROUND,
29153 IX86_BUILTIN_VFMADDSUBPD512_MASK,
29154 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29155 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29156 IX86_BUILTIN_VFMADDSUBPS512_MASK,
29157 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29158 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29159 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29160 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29161 IX86_BUILTIN_VFMSUBPD512_MASK3,
29162 IX86_BUILTIN_VFMSUBPS512_MASK3,
29163 IX86_BUILTIN_VFMSUBSD3_MASK3,
29164 IX86_BUILTIN_VFMSUBSS3_MASK3,
29165 IX86_BUILTIN_VFNMADDPD512_MASK,
29166 IX86_BUILTIN_VFNMADDPS512_MASK,
29167 IX86_BUILTIN_VFNMSUBPD512_MASK,
29168 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29169 IX86_BUILTIN_VFNMSUBPS512_MASK,
29170 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29171 IX86_BUILTIN_VPCLZCNTD512,
29172 IX86_BUILTIN_VPCLZCNTQ512,
29173 IX86_BUILTIN_VPCONFLICTD512,
29174 IX86_BUILTIN_VPCONFLICTQ512,
29175 IX86_BUILTIN_VPERMDF512,
29176 IX86_BUILTIN_VPERMDI512,
29177 IX86_BUILTIN_VPERMI2VARD512,
29178 IX86_BUILTIN_VPERMI2VARPD512,
29179 IX86_BUILTIN_VPERMI2VARPS512,
29180 IX86_BUILTIN_VPERMI2VARQ512,
29181 IX86_BUILTIN_VPERMILPD512,
29182 IX86_BUILTIN_VPERMILPS512,
29183 IX86_BUILTIN_VPERMILVARPD512,
29184 IX86_BUILTIN_VPERMILVARPS512,
29185 IX86_BUILTIN_VPERMT2VARD512,
29186 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29187 IX86_BUILTIN_VPERMT2VARPD512,
29188 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29189 IX86_BUILTIN_VPERMT2VARPS512,
29190 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29191 IX86_BUILTIN_VPERMT2VARQ512,
29192 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29193 IX86_BUILTIN_VPERMVARDF512,
29194 IX86_BUILTIN_VPERMVARDI512,
29195 IX86_BUILTIN_VPERMVARSF512,
29196 IX86_BUILTIN_VPERMVARSI512,
29197 IX86_BUILTIN_VTERNLOGD512_MASK,
29198 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29199 IX86_BUILTIN_VTERNLOGQ512_MASK,
29200 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29202 /* Mask arithmetic operations */
29203 IX86_BUILTIN_KAND16,
29204 IX86_BUILTIN_KANDN16,
29205 IX86_BUILTIN_KNOT16,
29206 IX86_BUILTIN_KOR16,
29207 IX86_BUILTIN_KORTESTC16,
29208 IX86_BUILTIN_KORTESTZ16,
29209 IX86_BUILTIN_KUNPCKBW,
29210 IX86_BUILTIN_KXNOR16,
29211 IX86_BUILTIN_KXOR16,
29212 IX86_BUILTIN_KMOV16,
29215 IX86_BUILTIN_PMOVUSQD256_MEM,
29216 IX86_BUILTIN_PMOVUSQD128_MEM,
29217 IX86_BUILTIN_PMOVSQD256_MEM,
29218 IX86_BUILTIN_PMOVSQD128_MEM,
29219 IX86_BUILTIN_PMOVQD256_MEM,
29220 IX86_BUILTIN_PMOVQD128_MEM,
29221 IX86_BUILTIN_PMOVUSQW256_MEM,
29222 IX86_BUILTIN_PMOVUSQW128_MEM,
29223 IX86_BUILTIN_PMOVSQW256_MEM,
29224 IX86_BUILTIN_PMOVSQW128_MEM,
29225 IX86_BUILTIN_PMOVQW256_MEM,
29226 IX86_BUILTIN_PMOVQW128_MEM,
29227 IX86_BUILTIN_PMOVUSQB256_MEM,
29228 IX86_BUILTIN_PMOVUSQB128_MEM,
29229 IX86_BUILTIN_PMOVSQB256_MEM,
29230 IX86_BUILTIN_PMOVSQB128_MEM,
29231 IX86_BUILTIN_PMOVQB256_MEM,
29232 IX86_BUILTIN_PMOVQB128_MEM,
29233 IX86_BUILTIN_PMOVUSDW256_MEM,
29234 IX86_BUILTIN_PMOVUSDW128_MEM,
29235 IX86_BUILTIN_PMOVSDW256_MEM,
29236 IX86_BUILTIN_PMOVSDW128_MEM,
29237 IX86_BUILTIN_PMOVDW256_MEM,
29238 IX86_BUILTIN_PMOVDW128_MEM,
29239 IX86_BUILTIN_PMOVUSDB256_MEM,
29240 IX86_BUILTIN_PMOVUSDB128_MEM,
29241 IX86_BUILTIN_PMOVSDB256_MEM,
29242 IX86_BUILTIN_PMOVSDB128_MEM,
29243 IX86_BUILTIN_PMOVDB256_MEM,
29244 IX86_BUILTIN_PMOVDB128_MEM,
29245 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29246 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29247 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29248 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29249 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29250 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29251 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29252 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29253 IX86_BUILTIN_LOADAPD256_MASK,
29254 IX86_BUILTIN_LOADAPD128_MASK,
29255 IX86_BUILTIN_LOADAPS256_MASK,
29256 IX86_BUILTIN_LOADAPS128_MASK,
29257 IX86_BUILTIN_STOREAPD256_MASK,
29258 IX86_BUILTIN_STOREAPD128_MASK,
29259 IX86_BUILTIN_STOREAPS256_MASK,
29260 IX86_BUILTIN_STOREAPS128_MASK,
29261 IX86_BUILTIN_LOADUPD256_MASK,
29262 IX86_BUILTIN_LOADUPD128_MASK,
29263 IX86_BUILTIN_LOADUPS256_MASK,
29264 IX86_BUILTIN_LOADUPS128_MASK,
29265 IX86_BUILTIN_STOREUPD256_MASK,
29266 IX86_BUILTIN_STOREUPD128_MASK,
29267 IX86_BUILTIN_STOREUPS256_MASK,
29268 IX86_BUILTIN_STOREUPS128_MASK,
29269 IX86_BUILTIN_LOADDQUDI256_MASK,
29270 IX86_BUILTIN_LOADDQUDI128_MASK,
29271 IX86_BUILTIN_LOADDQUSI256_MASK,
29272 IX86_BUILTIN_LOADDQUSI128_MASK,
29273 IX86_BUILTIN_LOADDQUHI256_MASK,
29274 IX86_BUILTIN_LOADDQUHI128_MASK,
29275 IX86_BUILTIN_LOADDQUQI256_MASK,
29276 IX86_BUILTIN_LOADDQUQI128_MASK,
29277 IX86_BUILTIN_STOREDQUDI256_MASK,
29278 IX86_BUILTIN_STOREDQUDI128_MASK,
29279 IX86_BUILTIN_STOREDQUSI256_MASK,
29280 IX86_BUILTIN_STOREDQUSI128_MASK,
29281 IX86_BUILTIN_STOREDQUHI256_MASK,
29282 IX86_BUILTIN_STOREDQUHI128_MASK,
29283 IX86_BUILTIN_STOREDQUQI256_MASK,
29284 IX86_BUILTIN_STOREDQUQI128_MASK,
29285 IX86_BUILTIN_COMPRESSPDSTORE256,
29286 IX86_BUILTIN_COMPRESSPDSTORE128,
29287 IX86_BUILTIN_COMPRESSPSSTORE256,
29288 IX86_BUILTIN_COMPRESSPSSTORE128,
29289 IX86_BUILTIN_PCOMPRESSQSTORE256,
29290 IX86_BUILTIN_PCOMPRESSQSTORE128,
29291 IX86_BUILTIN_PCOMPRESSDSTORE256,
29292 IX86_BUILTIN_PCOMPRESSDSTORE128,
29293 IX86_BUILTIN_EXPANDPDLOAD256,
29294 IX86_BUILTIN_EXPANDPDLOAD128,
29295 IX86_BUILTIN_EXPANDPSLOAD256,
29296 IX86_BUILTIN_EXPANDPSLOAD128,
29297 IX86_BUILTIN_PEXPANDQLOAD256,
29298 IX86_BUILTIN_PEXPANDQLOAD128,
29299 IX86_BUILTIN_PEXPANDDLOAD256,
29300 IX86_BUILTIN_PEXPANDDLOAD128,
29301 IX86_BUILTIN_EXPANDPDLOAD256Z,
29302 IX86_BUILTIN_EXPANDPDLOAD128Z,
29303 IX86_BUILTIN_EXPANDPSLOAD256Z,
29304 IX86_BUILTIN_EXPANDPSLOAD128Z,
29305 IX86_BUILTIN_PEXPANDQLOAD256Z,
29306 IX86_BUILTIN_PEXPANDQLOAD128Z,
29307 IX86_BUILTIN_PEXPANDDLOAD256Z,
29308 IX86_BUILTIN_PEXPANDDLOAD128Z,
29309 IX86_BUILTIN_PALIGNR256_MASK,
29310 IX86_BUILTIN_PALIGNR128_MASK,
29311 IX86_BUILTIN_MOVDQA64_256_MASK,
29312 IX86_BUILTIN_MOVDQA64_128_MASK,
29313 IX86_BUILTIN_MOVDQA32_256_MASK,
29314 IX86_BUILTIN_MOVDQA32_128_MASK,
29315 IX86_BUILTIN_MOVAPD256_MASK,
29316 IX86_BUILTIN_MOVAPD128_MASK,
29317 IX86_BUILTIN_MOVAPS256_MASK,
29318 IX86_BUILTIN_MOVAPS128_MASK,
29319 IX86_BUILTIN_MOVDQUHI256_MASK,
29320 IX86_BUILTIN_MOVDQUHI128_MASK,
29321 IX86_BUILTIN_MOVDQUQI256_MASK,
29322 IX86_BUILTIN_MOVDQUQI128_MASK,
29323 IX86_BUILTIN_MINPS128_MASK,
29324 IX86_BUILTIN_MAXPS128_MASK,
29325 IX86_BUILTIN_MINPD128_MASK,
29326 IX86_BUILTIN_MAXPD128_MASK,
29327 IX86_BUILTIN_MAXPD256_MASK,
29328 IX86_BUILTIN_MAXPS256_MASK,
29329 IX86_BUILTIN_MINPD256_MASK,
29330 IX86_BUILTIN_MINPS256_MASK,
29331 IX86_BUILTIN_MULPS128_MASK,
29332 IX86_BUILTIN_DIVPS128_MASK,
29333 IX86_BUILTIN_MULPD128_MASK,
29334 IX86_BUILTIN_DIVPD128_MASK,
29335 IX86_BUILTIN_DIVPD256_MASK,
29336 IX86_BUILTIN_DIVPS256_MASK,
29337 IX86_BUILTIN_MULPD256_MASK,
29338 IX86_BUILTIN_MULPS256_MASK,
29339 IX86_BUILTIN_ADDPD128_MASK,
29340 IX86_BUILTIN_ADDPD256_MASK,
29341 IX86_BUILTIN_ADDPS128_MASK,
29342 IX86_BUILTIN_ADDPS256_MASK,
29343 IX86_BUILTIN_SUBPD128_MASK,
29344 IX86_BUILTIN_SUBPD256_MASK,
29345 IX86_BUILTIN_SUBPS128_MASK,
29346 IX86_BUILTIN_SUBPS256_MASK,
29347 IX86_BUILTIN_XORPD256_MASK,
29348 IX86_BUILTIN_XORPD128_MASK,
29349 IX86_BUILTIN_XORPS256_MASK,
29350 IX86_BUILTIN_XORPS128_MASK,
29351 IX86_BUILTIN_ORPD256_MASK,
29352 IX86_BUILTIN_ORPD128_MASK,
29353 IX86_BUILTIN_ORPS256_MASK,
29354 IX86_BUILTIN_ORPS128_MASK,
29355 IX86_BUILTIN_BROADCASTF32x2_256,
29356 IX86_BUILTIN_BROADCASTI32x2_256,
29357 IX86_BUILTIN_BROADCASTI32x2_128,
29358 IX86_BUILTIN_BROADCASTF64X2_256,
29359 IX86_BUILTIN_BROADCASTI64X2_256,
29360 IX86_BUILTIN_BROADCASTF32X4_256,
29361 IX86_BUILTIN_BROADCASTI32X4_256,
29362 IX86_BUILTIN_EXTRACTF32X4_256,
29363 IX86_BUILTIN_EXTRACTI32X4_256,
29364 IX86_BUILTIN_DBPSADBW256,
29365 IX86_BUILTIN_DBPSADBW128,
29366 IX86_BUILTIN_CVTTPD2QQ256,
29367 IX86_BUILTIN_CVTTPD2QQ128,
29368 IX86_BUILTIN_CVTTPD2UQQ256,
29369 IX86_BUILTIN_CVTTPD2UQQ128,
29370 IX86_BUILTIN_CVTPD2QQ256,
29371 IX86_BUILTIN_CVTPD2QQ128,
29372 IX86_BUILTIN_CVTPD2UQQ256,
29373 IX86_BUILTIN_CVTPD2UQQ128,
29374 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29375 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29376 IX86_BUILTIN_CVTTPS2QQ256,
29377 IX86_BUILTIN_CVTTPS2QQ128,
29378 IX86_BUILTIN_CVTTPS2UQQ256,
29379 IX86_BUILTIN_CVTTPS2UQQ128,
29380 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29381 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29382 IX86_BUILTIN_CVTTPS2UDQ256,
29383 IX86_BUILTIN_CVTTPS2UDQ128,
29384 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29385 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29386 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29387 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29388 IX86_BUILTIN_CVTPD2DQ256_MASK,
29389 IX86_BUILTIN_CVTPD2DQ128_MASK,
29390 IX86_BUILTIN_CVTDQ2PD256_MASK,
29391 IX86_BUILTIN_CVTDQ2PD128_MASK,
29392 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29393 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29394 IX86_BUILTIN_CVTDQ2PS256_MASK,
29395 IX86_BUILTIN_CVTDQ2PS128_MASK,
29396 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29397 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29398 IX86_BUILTIN_CVTPS2PD256_MASK,
29399 IX86_BUILTIN_CVTPS2PD128_MASK,
29400 IX86_BUILTIN_PBROADCASTB256_MASK,
29401 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29402 IX86_BUILTIN_PBROADCASTB128_MASK,
29403 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29404 IX86_BUILTIN_PBROADCASTW256_MASK,
29405 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29406 IX86_BUILTIN_PBROADCASTW128_MASK,
29407 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29408 IX86_BUILTIN_PBROADCASTD256_MASK,
29409 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29410 IX86_BUILTIN_PBROADCASTD128_MASK,
29411 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29412 IX86_BUILTIN_PBROADCASTQ256_MASK,
29413 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29414 IX86_BUILTIN_PBROADCASTQ128_MASK,
29415 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29416 IX86_BUILTIN_BROADCASTSS256,
29417 IX86_BUILTIN_BROADCASTSS128,
29418 IX86_BUILTIN_BROADCASTSD256,
29419 IX86_BUILTIN_EXTRACTF64X2_256,
29420 IX86_BUILTIN_EXTRACTI64X2_256,
29421 IX86_BUILTIN_INSERTF32X4_256,
29422 IX86_BUILTIN_INSERTI32X4_256,
29423 IX86_BUILTIN_PMOVSXBW256_MASK,
29424 IX86_BUILTIN_PMOVSXBW128_MASK,
29425 IX86_BUILTIN_PMOVSXBD256_MASK,
29426 IX86_BUILTIN_PMOVSXBD128_MASK,
29427 IX86_BUILTIN_PMOVSXBQ256_MASK,
29428 IX86_BUILTIN_PMOVSXBQ128_MASK,
29429 IX86_BUILTIN_PMOVSXWD256_MASK,
29430 IX86_BUILTIN_PMOVSXWD128_MASK,
29431 IX86_BUILTIN_PMOVSXWQ256_MASK,
29432 IX86_BUILTIN_PMOVSXWQ128_MASK,
29433 IX86_BUILTIN_PMOVSXDQ256_MASK,
29434 IX86_BUILTIN_PMOVSXDQ128_MASK,
29435 IX86_BUILTIN_PMOVZXBW256_MASK,
29436 IX86_BUILTIN_PMOVZXBW128_MASK,
29437 IX86_BUILTIN_PMOVZXBD256_MASK,
29438 IX86_BUILTIN_PMOVZXBD128_MASK,
29439 IX86_BUILTIN_PMOVZXBQ256_MASK,
29440 IX86_BUILTIN_PMOVZXBQ128_MASK,
29441 IX86_BUILTIN_PMOVZXWD256_MASK,
29442 IX86_BUILTIN_PMOVZXWD128_MASK,
29443 IX86_BUILTIN_PMOVZXWQ256_MASK,
29444 IX86_BUILTIN_PMOVZXWQ128_MASK,
29445 IX86_BUILTIN_PMOVZXDQ256_MASK,
29446 IX86_BUILTIN_PMOVZXDQ128_MASK,
29447 IX86_BUILTIN_REDUCEPD256_MASK,
29448 IX86_BUILTIN_REDUCEPD128_MASK,
29449 IX86_BUILTIN_REDUCEPS256_MASK,
29450 IX86_BUILTIN_REDUCEPS128_MASK,
29451 IX86_BUILTIN_REDUCESD_MASK,
29452 IX86_BUILTIN_REDUCESS_MASK,
29453 IX86_BUILTIN_VPERMVARHI256_MASK,
29454 IX86_BUILTIN_VPERMVARHI128_MASK,
29455 IX86_BUILTIN_VPERMT2VARHI256,
29456 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29457 IX86_BUILTIN_VPERMT2VARHI128,
29458 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29459 IX86_BUILTIN_VPERMI2VARHI256,
29460 IX86_BUILTIN_VPERMI2VARHI128,
29461 IX86_BUILTIN_RCP14PD256,
29462 IX86_BUILTIN_RCP14PD128,
29463 IX86_BUILTIN_RCP14PS256,
29464 IX86_BUILTIN_RCP14PS128,
29465 IX86_BUILTIN_RSQRT14PD256_MASK,
29466 IX86_BUILTIN_RSQRT14PD128_MASK,
29467 IX86_BUILTIN_RSQRT14PS256_MASK,
29468 IX86_BUILTIN_RSQRT14PS128_MASK,
29469 IX86_BUILTIN_SQRTPD256_MASK,
29470 IX86_BUILTIN_SQRTPD128_MASK,
29471 IX86_BUILTIN_SQRTPS256_MASK,
29472 IX86_BUILTIN_SQRTPS128_MASK,
29473 IX86_BUILTIN_PADDB128_MASK,
29474 IX86_BUILTIN_PADDW128_MASK,
29475 IX86_BUILTIN_PADDD128_MASK,
29476 IX86_BUILTIN_PADDQ128_MASK,
29477 IX86_BUILTIN_PSUBB128_MASK,
29478 IX86_BUILTIN_PSUBW128_MASK,
29479 IX86_BUILTIN_PSUBD128_MASK,
29480 IX86_BUILTIN_PSUBQ128_MASK,
29481 IX86_BUILTIN_PADDSB128_MASK,
29482 IX86_BUILTIN_PADDSW128_MASK,
29483 IX86_BUILTIN_PSUBSB128_MASK,
29484 IX86_BUILTIN_PSUBSW128_MASK,
29485 IX86_BUILTIN_PADDUSB128_MASK,
29486 IX86_BUILTIN_PADDUSW128_MASK,
29487 IX86_BUILTIN_PSUBUSB128_MASK,
29488 IX86_BUILTIN_PSUBUSW128_MASK,
29489 IX86_BUILTIN_PADDB256_MASK,
29490 IX86_BUILTIN_PADDW256_MASK,
29491 IX86_BUILTIN_PADDD256_MASK,
29492 IX86_BUILTIN_PADDQ256_MASK,
29493 IX86_BUILTIN_PADDSB256_MASK,
29494 IX86_BUILTIN_PADDSW256_MASK,
29495 IX86_BUILTIN_PADDUSB256_MASK,
29496 IX86_BUILTIN_PADDUSW256_MASK,
29497 IX86_BUILTIN_PSUBB256_MASK,
29498 IX86_BUILTIN_PSUBW256_MASK,
29499 IX86_BUILTIN_PSUBD256_MASK,
29500 IX86_BUILTIN_PSUBQ256_MASK,
29501 IX86_BUILTIN_PSUBSB256_MASK,
29502 IX86_BUILTIN_PSUBSW256_MASK,
29503 IX86_BUILTIN_PSUBUSB256_MASK,
29504 IX86_BUILTIN_PSUBUSW256_MASK,
29505 IX86_BUILTIN_SHUF_F64x2_256,
29506 IX86_BUILTIN_SHUF_I64x2_256,
29507 IX86_BUILTIN_SHUF_I32x4_256,
29508 IX86_BUILTIN_SHUF_F32x4_256,
29509 IX86_BUILTIN_PMOVWB128,
29510 IX86_BUILTIN_PMOVWB256,
29511 IX86_BUILTIN_PMOVSWB128,
29512 IX86_BUILTIN_PMOVSWB256,
29513 IX86_BUILTIN_PMOVUSWB128,
29514 IX86_BUILTIN_PMOVUSWB256,
29515 IX86_BUILTIN_PMOVDB128,
29516 IX86_BUILTIN_PMOVDB256,
29517 IX86_BUILTIN_PMOVSDB128,
29518 IX86_BUILTIN_PMOVSDB256,
29519 IX86_BUILTIN_PMOVUSDB128,
29520 IX86_BUILTIN_PMOVUSDB256,
29521 IX86_BUILTIN_PMOVDW128,
29522 IX86_BUILTIN_PMOVDW256,
29523 IX86_BUILTIN_PMOVSDW128,
29524 IX86_BUILTIN_PMOVSDW256,
29525 IX86_BUILTIN_PMOVUSDW128,
29526 IX86_BUILTIN_PMOVUSDW256,
29527 IX86_BUILTIN_PMOVQB128,
29528 IX86_BUILTIN_PMOVQB256,
29529 IX86_BUILTIN_PMOVSQB128,
29530 IX86_BUILTIN_PMOVSQB256,
29531 IX86_BUILTIN_PMOVUSQB128,
29532 IX86_BUILTIN_PMOVUSQB256,
29533 IX86_BUILTIN_PMOVQW128,
29534 IX86_BUILTIN_PMOVQW256,
29535 IX86_BUILTIN_PMOVSQW128,
29536 IX86_BUILTIN_PMOVSQW256,
29537 IX86_BUILTIN_PMOVUSQW128,
29538 IX86_BUILTIN_PMOVUSQW256,
29539 IX86_BUILTIN_PMOVQD128,
29540 IX86_BUILTIN_PMOVQD256,
29541 IX86_BUILTIN_PMOVSQD128,
29542 IX86_BUILTIN_PMOVSQD256,
29543 IX86_BUILTIN_PMOVUSQD128,
29544 IX86_BUILTIN_PMOVUSQD256,
29545 IX86_BUILTIN_RANGEPD256,
29546 IX86_BUILTIN_RANGEPD128,
29547 IX86_BUILTIN_RANGEPS256,
29548 IX86_BUILTIN_RANGEPS128,
29549 IX86_BUILTIN_GETEXPPS256,
29550 IX86_BUILTIN_GETEXPPD256,
29551 IX86_BUILTIN_GETEXPPS128,
29552 IX86_BUILTIN_GETEXPPD128,
29553 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29554 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29555 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29556 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29557 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29558 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29559 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29560 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29561 IX86_BUILTIN_PABSQ256,
29562 IX86_BUILTIN_PABSQ128,
29563 IX86_BUILTIN_PABSD256_MASK,
29564 IX86_BUILTIN_PABSD128_MASK,
29565 IX86_BUILTIN_PMULHRSW256_MASK,
29566 IX86_BUILTIN_PMULHRSW128_MASK,
29567 IX86_BUILTIN_PMULHUW128_MASK,
29568 IX86_BUILTIN_PMULHUW256_MASK,
29569 IX86_BUILTIN_PMULHW256_MASK,
29570 IX86_BUILTIN_PMULHW128_MASK,
29571 IX86_BUILTIN_PMULLW256_MASK,
29572 IX86_BUILTIN_PMULLW128_MASK,
29573 IX86_BUILTIN_PMULLQ256,
29574 IX86_BUILTIN_PMULLQ128,
29575 IX86_BUILTIN_ANDPD256_MASK,
29576 IX86_BUILTIN_ANDPD128_MASK,
29577 IX86_BUILTIN_ANDPS256_MASK,
29578 IX86_BUILTIN_ANDPS128_MASK,
29579 IX86_BUILTIN_ANDNPD256_MASK,
29580 IX86_BUILTIN_ANDNPD128_MASK,
29581 IX86_BUILTIN_ANDNPS256_MASK,
29582 IX86_BUILTIN_ANDNPS128_MASK,
29583 IX86_BUILTIN_PSLLWI128_MASK,
29584 IX86_BUILTIN_PSLLDI128_MASK,
29585 IX86_BUILTIN_PSLLQI128_MASK,
29586 IX86_BUILTIN_PSLLW128_MASK,
29587 IX86_BUILTIN_PSLLD128_MASK,
29588 IX86_BUILTIN_PSLLQ128_MASK,
29589 IX86_BUILTIN_PSLLWI256_MASK ,
29590 IX86_BUILTIN_PSLLW256_MASK,
29591 IX86_BUILTIN_PSLLDI256_MASK,
29592 IX86_BUILTIN_PSLLD256_MASK,
29593 IX86_BUILTIN_PSLLQI256_MASK,
29594 IX86_BUILTIN_PSLLQ256_MASK,
29595 IX86_BUILTIN_PSRADI128_MASK,
29596 IX86_BUILTIN_PSRAD128_MASK,
29597 IX86_BUILTIN_PSRADI256_MASK,
29598 IX86_BUILTIN_PSRAD256_MASK,
29599 IX86_BUILTIN_PSRAQI128_MASK,
29600 IX86_BUILTIN_PSRAQ128_MASK,
29601 IX86_BUILTIN_PSRAQI256_MASK,
29602 IX86_BUILTIN_PSRAQ256_MASK,
29603 IX86_BUILTIN_PANDD256,
29604 IX86_BUILTIN_PANDD128,
29605 IX86_BUILTIN_PSRLDI128_MASK,
29606 IX86_BUILTIN_PSRLD128_MASK,
29607 IX86_BUILTIN_PSRLDI256_MASK,
29608 IX86_BUILTIN_PSRLD256_MASK,
29609 IX86_BUILTIN_PSRLQI128_MASK,
29610 IX86_BUILTIN_PSRLQ128_MASK,
29611 IX86_BUILTIN_PSRLQI256_MASK,
29612 IX86_BUILTIN_PSRLQ256_MASK,
29613 IX86_BUILTIN_PANDQ256,
29614 IX86_BUILTIN_PANDQ128,
29615 IX86_BUILTIN_PANDND256,
29616 IX86_BUILTIN_PANDND128,
29617 IX86_BUILTIN_PANDNQ256,
29618 IX86_BUILTIN_PANDNQ128,
29619 IX86_BUILTIN_PORD256,
29620 IX86_BUILTIN_PORD128,
29621 IX86_BUILTIN_PORQ256,
29622 IX86_BUILTIN_PORQ128,
29623 IX86_BUILTIN_PXORD256,
29624 IX86_BUILTIN_PXORD128,
29625 IX86_BUILTIN_PXORQ256,
29626 IX86_BUILTIN_PXORQ128,
29627 IX86_BUILTIN_PACKSSWB256_MASK,
29628 IX86_BUILTIN_PACKSSWB128_MASK,
29629 IX86_BUILTIN_PACKUSWB256_MASK,
29630 IX86_BUILTIN_PACKUSWB128_MASK,
29631 IX86_BUILTIN_RNDSCALEPS256,
29632 IX86_BUILTIN_RNDSCALEPD256,
29633 IX86_BUILTIN_RNDSCALEPS128,
29634 IX86_BUILTIN_RNDSCALEPD128,
29635 IX86_BUILTIN_VTERNLOGQ256_MASK,
29636 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29637 IX86_BUILTIN_VTERNLOGD256_MASK,
29638 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29639 IX86_BUILTIN_VTERNLOGQ128_MASK,
29640 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29641 IX86_BUILTIN_VTERNLOGD128_MASK,
29642 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29643 IX86_BUILTIN_SCALEFPD256,
29644 IX86_BUILTIN_SCALEFPS256,
29645 IX86_BUILTIN_SCALEFPD128,
29646 IX86_BUILTIN_SCALEFPS128,
29647 IX86_BUILTIN_VFMADDPD256_MASK,
29648 IX86_BUILTIN_VFMADDPD256_MASK3,
29649 IX86_BUILTIN_VFMADDPD256_MASKZ,
29650 IX86_BUILTIN_VFMADDPD128_MASK,
29651 IX86_BUILTIN_VFMADDPD128_MASK3,
29652 IX86_BUILTIN_VFMADDPD128_MASKZ,
29653 IX86_BUILTIN_VFMADDPS256_MASK,
29654 IX86_BUILTIN_VFMADDPS256_MASK3,
29655 IX86_BUILTIN_VFMADDPS256_MASKZ,
29656 IX86_BUILTIN_VFMADDPS128_MASK,
29657 IX86_BUILTIN_VFMADDPS128_MASK3,
29658 IX86_BUILTIN_VFMADDPS128_MASKZ,
29659 IX86_BUILTIN_VFMSUBPD256_MASK3,
29660 IX86_BUILTIN_VFMSUBPD128_MASK3,
29661 IX86_BUILTIN_VFMSUBPS256_MASK3,
29662 IX86_BUILTIN_VFMSUBPS128_MASK3,
29663 IX86_BUILTIN_VFNMADDPD256_MASK,
29664 IX86_BUILTIN_VFNMADDPD128_MASK,
29665 IX86_BUILTIN_VFNMADDPS256_MASK,
29666 IX86_BUILTIN_VFNMADDPS128_MASK,
29667 IX86_BUILTIN_VFNMSUBPD256_MASK,
29668 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29669 IX86_BUILTIN_VFNMSUBPD128_MASK,
29670 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29671 IX86_BUILTIN_VFNMSUBPS256_MASK,
29672 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29673 IX86_BUILTIN_VFNMSUBPS128_MASK,
29674 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29675 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29676 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29677 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29678 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29679 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29680 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29681 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29682 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29683 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29684 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29685 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29686 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29687 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29688 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29689 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29690 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29691 IX86_BUILTIN_INSERTF64X2_256,
29692 IX86_BUILTIN_INSERTI64X2_256,
29693 IX86_BUILTIN_PSRAVV16HI,
29694 IX86_BUILTIN_PSRAVV8HI,
29695 IX86_BUILTIN_PMADDUBSW256_MASK,
29696 IX86_BUILTIN_PMADDUBSW128_MASK,
29697 IX86_BUILTIN_PMADDWD256_MASK,
29698 IX86_BUILTIN_PMADDWD128_MASK,
29699 IX86_BUILTIN_PSRLVV16HI,
29700 IX86_BUILTIN_PSRLVV8HI,
29701 IX86_BUILTIN_CVTPS2DQ256_MASK,
29702 IX86_BUILTIN_CVTPS2DQ128_MASK,
29703 IX86_BUILTIN_CVTPS2UDQ256,
29704 IX86_BUILTIN_CVTPS2UDQ128,
29705 IX86_BUILTIN_CVTPS2QQ256,
29706 IX86_BUILTIN_CVTPS2QQ128,
29707 IX86_BUILTIN_CVTPS2UQQ256,
29708 IX86_BUILTIN_CVTPS2UQQ128,
29709 IX86_BUILTIN_GETMANTPS256,
29710 IX86_BUILTIN_GETMANTPS128,
29711 IX86_BUILTIN_GETMANTPD256,
29712 IX86_BUILTIN_GETMANTPD128,
29713 IX86_BUILTIN_MOVDDUP256_MASK,
29714 IX86_BUILTIN_MOVDDUP128_MASK,
29715 IX86_BUILTIN_MOVSHDUP256_MASK,
29716 IX86_BUILTIN_MOVSHDUP128_MASK,
29717 IX86_BUILTIN_MOVSLDUP256_MASK,
29718 IX86_BUILTIN_MOVSLDUP128_MASK,
29719 IX86_BUILTIN_CVTQQ2PS256,
29720 IX86_BUILTIN_CVTQQ2PS128,
29721 IX86_BUILTIN_CVTUQQ2PS256,
29722 IX86_BUILTIN_CVTUQQ2PS128,
29723 IX86_BUILTIN_CVTQQ2PD256,
29724 IX86_BUILTIN_CVTQQ2PD128,
29725 IX86_BUILTIN_CVTUQQ2PD256,
29726 IX86_BUILTIN_CVTUQQ2PD128,
29727 IX86_BUILTIN_VPERMT2VARQ256,
29728 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29729 IX86_BUILTIN_VPERMT2VARD256,
29730 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29731 IX86_BUILTIN_VPERMI2VARQ256,
29732 IX86_BUILTIN_VPERMI2VARD256,
29733 IX86_BUILTIN_VPERMT2VARPD256,
29734 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29735 IX86_BUILTIN_VPERMT2VARPS256,
29736 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29737 IX86_BUILTIN_VPERMI2VARPD256,
29738 IX86_BUILTIN_VPERMI2VARPS256,
29739 IX86_BUILTIN_VPERMT2VARQ128,
29740 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29741 IX86_BUILTIN_VPERMT2VARD128,
29742 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29743 IX86_BUILTIN_VPERMI2VARQ128,
29744 IX86_BUILTIN_VPERMI2VARD128,
29745 IX86_BUILTIN_VPERMT2VARPD128,
29746 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29747 IX86_BUILTIN_VPERMT2VARPS128,
29748 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29749 IX86_BUILTIN_VPERMI2VARPD128,
29750 IX86_BUILTIN_VPERMI2VARPS128,
29751 IX86_BUILTIN_PSHUFB256_MASK,
29752 IX86_BUILTIN_PSHUFB128_MASK,
29753 IX86_BUILTIN_PSHUFHW256_MASK,
29754 IX86_BUILTIN_PSHUFHW128_MASK,
29755 IX86_BUILTIN_PSHUFLW256_MASK,
29756 IX86_BUILTIN_PSHUFLW128_MASK,
29757 IX86_BUILTIN_PSHUFD256_MASK,
29758 IX86_BUILTIN_PSHUFD128_MASK,
29759 IX86_BUILTIN_SHUFPD256_MASK,
29760 IX86_BUILTIN_SHUFPD128_MASK,
29761 IX86_BUILTIN_SHUFPS256_MASK,
29762 IX86_BUILTIN_SHUFPS128_MASK,
29763 IX86_BUILTIN_PROLVQ256,
29764 IX86_BUILTIN_PROLVQ128,
29765 IX86_BUILTIN_PROLQ256,
29766 IX86_BUILTIN_PROLQ128,
29767 IX86_BUILTIN_PRORVQ256,
29768 IX86_BUILTIN_PRORVQ128,
29769 IX86_BUILTIN_PRORQ256,
29770 IX86_BUILTIN_PRORQ128,
29771 IX86_BUILTIN_PSRAVQ128,
29772 IX86_BUILTIN_PSRAVQ256,
29773 IX86_BUILTIN_PSLLVV4DI_MASK,
29774 IX86_BUILTIN_PSLLVV2DI_MASK,
29775 IX86_BUILTIN_PSLLVV8SI_MASK,
29776 IX86_BUILTIN_PSLLVV4SI_MASK,
29777 IX86_BUILTIN_PSRAVV8SI_MASK,
29778 IX86_BUILTIN_PSRAVV4SI_MASK,
29779 IX86_BUILTIN_PSRLVV4DI_MASK,
29780 IX86_BUILTIN_PSRLVV2DI_MASK,
29781 IX86_BUILTIN_PSRLVV8SI_MASK,
29782 IX86_BUILTIN_PSRLVV4SI_MASK,
29783 IX86_BUILTIN_PSRAWI256_MASK,
29784 IX86_BUILTIN_PSRAW256_MASK,
29785 IX86_BUILTIN_PSRAWI128_MASK,
29786 IX86_BUILTIN_PSRAW128_MASK,
29787 IX86_BUILTIN_PSRLWI256_MASK,
29788 IX86_BUILTIN_PSRLW256_MASK,
29789 IX86_BUILTIN_PSRLWI128_MASK,
29790 IX86_BUILTIN_PSRLW128_MASK,
29791 IX86_BUILTIN_PRORVD256,
29792 IX86_BUILTIN_PROLVD256,
29793 IX86_BUILTIN_PRORD256,
29794 IX86_BUILTIN_PROLD256,
29795 IX86_BUILTIN_PRORVD128,
29796 IX86_BUILTIN_PROLVD128,
29797 IX86_BUILTIN_PRORD128,
29798 IX86_BUILTIN_PROLD128,
29799 IX86_BUILTIN_FPCLASSPD256,
29800 IX86_BUILTIN_FPCLASSPD128,
29801 IX86_BUILTIN_FPCLASSSD,
29802 IX86_BUILTIN_FPCLASSPS256,
29803 IX86_BUILTIN_FPCLASSPS128,
29804 IX86_BUILTIN_FPCLASSSS,
29805 IX86_BUILTIN_CVTB2MASK128,
29806 IX86_BUILTIN_CVTB2MASK256,
29807 IX86_BUILTIN_CVTW2MASK128,
29808 IX86_BUILTIN_CVTW2MASK256,
29809 IX86_BUILTIN_CVTD2MASK128,
29810 IX86_BUILTIN_CVTD2MASK256,
29811 IX86_BUILTIN_CVTQ2MASK128,
29812 IX86_BUILTIN_CVTQ2MASK256,
29813 IX86_BUILTIN_CVTMASK2B128,
29814 IX86_BUILTIN_CVTMASK2B256,
29815 IX86_BUILTIN_CVTMASK2W128,
29816 IX86_BUILTIN_CVTMASK2W256,
29817 IX86_BUILTIN_CVTMASK2D128,
29818 IX86_BUILTIN_CVTMASK2D256,
29819 IX86_BUILTIN_CVTMASK2Q128,
29820 IX86_BUILTIN_CVTMASK2Q256,
29821 IX86_BUILTIN_PCMPEQB128_MASK,
29822 IX86_BUILTIN_PCMPEQB256_MASK,
29823 IX86_BUILTIN_PCMPEQW128_MASK,
29824 IX86_BUILTIN_PCMPEQW256_MASK,
29825 IX86_BUILTIN_PCMPEQD128_MASK,
29826 IX86_BUILTIN_PCMPEQD256_MASK,
29827 IX86_BUILTIN_PCMPEQQ128_MASK,
29828 IX86_BUILTIN_PCMPEQQ256_MASK,
29829 IX86_BUILTIN_PCMPGTB128_MASK,
29830 IX86_BUILTIN_PCMPGTB256_MASK,
29831 IX86_BUILTIN_PCMPGTW128_MASK,
29832 IX86_BUILTIN_PCMPGTW256_MASK,
29833 IX86_BUILTIN_PCMPGTD128_MASK,
29834 IX86_BUILTIN_PCMPGTD256_MASK,
29835 IX86_BUILTIN_PCMPGTQ128_MASK,
29836 IX86_BUILTIN_PCMPGTQ256_MASK,
29837 IX86_BUILTIN_PTESTMB128,
29838 IX86_BUILTIN_PTESTMB256,
29839 IX86_BUILTIN_PTESTMW128,
29840 IX86_BUILTIN_PTESTMW256,
29841 IX86_BUILTIN_PTESTMD128,
29842 IX86_BUILTIN_PTESTMD256,
29843 IX86_BUILTIN_PTESTMQ128,
29844 IX86_BUILTIN_PTESTMQ256,
29845 IX86_BUILTIN_PTESTNMB128,
29846 IX86_BUILTIN_PTESTNMB256,
29847 IX86_BUILTIN_PTESTNMW128,
29848 IX86_BUILTIN_PTESTNMW256,
29849 IX86_BUILTIN_PTESTNMD128,
29850 IX86_BUILTIN_PTESTNMD256,
29851 IX86_BUILTIN_PTESTNMQ128,
29852 IX86_BUILTIN_PTESTNMQ256,
29853 IX86_BUILTIN_PBROADCASTMB128,
29854 IX86_BUILTIN_PBROADCASTMB256,
29855 IX86_BUILTIN_PBROADCASTMW128,
29856 IX86_BUILTIN_PBROADCASTMW256,
29857 IX86_BUILTIN_COMPRESSPD256,
29858 IX86_BUILTIN_COMPRESSPD128,
29859 IX86_BUILTIN_COMPRESSPS256,
29860 IX86_BUILTIN_COMPRESSPS128,
29861 IX86_BUILTIN_PCOMPRESSQ256,
29862 IX86_BUILTIN_PCOMPRESSQ128,
29863 IX86_BUILTIN_PCOMPRESSD256,
29864 IX86_BUILTIN_PCOMPRESSD128,
29865 IX86_BUILTIN_EXPANDPD256,
29866 IX86_BUILTIN_EXPANDPD128,
29867 IX86_BUILTIN_EXPANDPS256,
29868 IX86_BUILTIN_EXPANDPS128,
29869 IX86_BUILTIN_PEXPANDQ256,
29870 IX86_BUILTIN_PEXPANDQ128,
29871 IX86_BUILTIN_PEXPANDD256,
29872 IX86_BUILTIN_PEXPANDD128,
29873 IX86_BUILTIN_EXPANDPD256Z,
29874 IX86_BUILTIN_EXPANDPD128Z,
29875 IX86_BUILTIN_EXPANDPS256Z,
29876 IX86_BUILTIN_EXPANDPS128Z,
29877 IX86_BUILTIN_PEXPANDQ256Z,
29878 IX86_BUILTIN_PEXPANDQ128Z,
29879 IX86_BUILTIN_PEXPANDD256Z,
29880 IX86_BUILTIN_PEXPANDD128Z,
29881 IX86_BUILTIN_PMAXSD256_MASK,
29882 IX86_BUILTIN_PMINSD256_MASK,
29883 IX86_BUILTIN_PMAXUD256_MASK,
29884 IX86_BUILTIN_PMINUD256_MASK,
29885 IX86_BUILTIN_PMAXSD128_MASK,
29886 IX86_BUILTIN_PMINSD128_MASK,
29887 IX86_BUILTIN_PMAXUD128_MASK,
29888 IX86_BUILTIN_PMINUD128_MASK,
29889 IX86_BUILTIN_PMAXSQ256_MASK,
29890 IX86_BUILTIN_PMINSQ256_MASK,
29891 IX86_BUILTIN_PMAXUQ256_MASK,
29892 IX86_BUILTIN_PMINUQ256_MASK,
29893 IX86_BUILTIN_PMAXSQ128_MASK,
29894 IX86_BUILTIN_PMINSQ128_MASK,
29895 IX86_BUILTIN_PMAXUQ128_MASK,
29896 IX86_BUILTIN_PMINUQ128_MASK,
29897 IX86_BUILTIN_PMINSB256_MASK,
29898 IX86_BUILTIN_PMINUB256_MASK,
29899 IX86_BUILTIN_PMAXSB256_MASK,
29900 IX86_BUILTIN_PMAXUB256_MASK,
29901 IX86_BUILTIN_PMINSB128_MASK,
29902 IX86_BUILTIN_PMINUB128_MASK,
29903 IX86_BUILTIN_PMAXSB128_MASK,
29904 IX86_BUILTIN_PMAXUB128_MASK,
29905 IX86_BUILTIN_PMINSW256_MASK,
29906 IX86_BUILTIN_PMINUW256_MASK,
29907 IX86_BUILTIN_PMAXSW256_MASK,
29908 IX86_BUILTIN_PMAXUW256_MASK,
29909 IX86_BUILTIN_PMINSW128_MASK,
29910 IX86_BUILTIN_PMINUW128_MASK,
29911 IX86_BUILTIN_PMAXSW128_MASK,
29912 IX86_BUILTIN_PMAXUW128_MASK,
29913 IX86_BUILTIN_VPCONFLICTQ256,
29914 IX86_BUILTIN_VPCONFLICTD256,
29915 IX86_BUILTIN_VPCLZCNTQ256,
29916 IX86_BUILTIN_VPCLZCNTD256,
29917 IX86_BUILTIN_UNPCKHPD256_MASK,
29918 IX86_BUILTIN_UNPCKHPD128_MASK,
29919 IX86_BUILTIN_UNPCKHPS256_MASK,
29920 IX86_BUILTIN_UNPCKHPS128_MASK,
29921 IX86_BUILTIN_UNPCKLPD256_MASK,
29922 IX86_BUILTIN_UNPCKLPD128_MASK,
29923 IX86_BUILTIN_UNPCKLPS256_MASK,
29924 IX86_BUILTIN_VPCONFLICTQ128,
29925 IX86_BUILTIN_VPCONFLICTD128,
29926 IX86_BUILTIN_VPCLZCNTQ128,
29927 IX86_BUILTIN_VPCLZCNTD128,
29928 IX86_BUILTIN_UNPCKLPS128_MASK,
29929 IX86_BUILTIN_ALIGND256,
29930 IX86_BUILTIN_ALIGNQ256,
29931 IX86_BUILTIN_ALIGND128,
29932 IX86_BUILTIN_ALIGNQ128,
29933 IX86_BUILTIN_CVTPS2PH256_MASK,
29934 IX86_BUILTIN_CVTPS2PH_MASK,
29935 IX86_BUILTIN_CVTPH2PS_MASK,
29936 IX86_BUILTIN_CVTPH2PS256_MASK,
29937 IX86_BUILTIN_PUNPCKHDQ128_MASK,
29938 IX86_BUILTIN_PUNPCKHDQ256_MASK,
29939 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
29940 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
29941 IX86_BUILTIN_PUNPCKLDQ128_MASK,
29942 IX86_BUILTIN_PUNPCKLDQ256_MASK,
29943 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
29944 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
29945 IX86_BUILTIN_PUNPCKHBW128_MASK,
29946 IX86_BUILTIN_PUNPCKHBW256_MASK,
29947 IX86_BUILTIN_PUNPCKHWD128_MASK,
29948 IX86_BUILTIN_PUNPCKHWD256_MASK,
29949 IX86_BUILTIN_PUNPCKLBW128_MASK,
29950 IX86_BUILTIN_PUNPCKLBW256_MASK,
29951 IX86_BUILTIN_PUNPCKLWD128_MASK,
29952 IX86_BUILTIN_PUNPCKLWD256_MASK,
29953 IX86_BUILTIN_PSLLVV16HI,
29954 IX86_BUILTIN_PSLLVV8HI,
29955 IX86_BUILTIN_PACKSSDW256_MASK,
29956 IX86_BUILTIN_PACKSSDW128_MASK,
29957 IX86_BUILTIN_PACKUSDW256_MASK,
29958 IX86_BUILTIN_PACKUSDW128_MASK,
29959 IX86_BUILTIN_PAVGB256_MASK,
29960 IX86_BUILTIN_PAVGW256_MASK,
29961 IX86_BUILTIN_PAVGB128_MASK,
29962 IX86_BUILTIN_PAVGW128_MASK,
29963 IX86_BUILTIN_VPERMVARSF256_MASK,
29964 IX86_BUILTIN_VPERMVARDF256_MASK,
29965 IX86_BUILTIN_VPERMDF256_MASK,
29966 IX86_BUILTIN_PABSB256_MASK,
29967 IX86_BUILTIN_PABSB128_MASK,
29968 IX86_BUILTIN_PABSW256_MASK,
29969 IX86_BUILTIN_PABSW128_MASK,
29970 IX86_BUILTIN_VPERMILVARPD_MASK,
29971 IX86_BUILTIN_VPERMILVARPS_MASK,
29972 IX86_BUILTIN_VPERMILVARPD256_MASK,
29973 IX86_BUILTIN_VPERMILVARPS256_MASK,
29974 IX86_BUILTIN_VPERMILPD_MASK,
29975 IX86_BUILTIN_VPERMILPS_MASK,
29976 IX86_BUILTIN_VPERMILPD256_MASK,
29977 IX86_BUILTIN_VPERMILPS256_MASK,
29978 IX86_BUILTIN_BLENDMQ256,
29979 IX86_BUILTIN_BLENDMD256,
29980 IX86_BUILTIN_BLENDMPD256,
29981 IX86_BUILTIN_BLENDMPS256,
29982 IX86_BUILTIN_BLENDMQ128,
29983 IX86_BUILTIN_BLENDMD128,
29984 IX86_BUILTIN_BLENDMPD128,
29985 IX86_BUILTIN_BLENDMPS128,
29986 IX86_BUILTIN_BLENDMW256,
29987 IX86_BUILTIN_BLENDMB256,
29988 IX86_BUILTIN_BLENDMW128,
29989 IX86_BUILTIN_BLENDMB128,
29990 IX86_BUILTIN_PMULLD256_MASK,
29991 IX86_BUILTIN_PMULLD128_MASK,
29992 IX86_BUILTIN_PMULUDQ256_MASK,
29993 IX86_BUILTIN_PMULDQ256_MASK,
29994 IX86_BUILTIN_PMULDQ128_MASK,
29995 IX86_BUILTIN_PMULUDQ128_MASK,
29996 IX86_BUILTIN_CVTPD2PS256_MASK,
29997 IX86_BUILTIN_CVTPD2PS_MASK,
29998 IX86_BUILTIN_VPERMVARSI256_MASK,
29999 IX86_BUILTIN_VPERMVARDI256_MASK,
30000 IX86_BUILTIN_VPERMDI256_MASK,
30001 IX86_BUILTIN_CMPQ256,
30002 IX86_BUILTIN_CMPD256,
30003 IX86_BUILTIN_UCMPQ256,
30004 IX86_BUILTIN_UCMPD256,
30005 IX86_BUILTIN_CMPB256,
30006 IX86_BUILTIN_CMPW256,
30007 IX86_BUILTIN_UCMPB256,
30008 IX86_BUILTIN_UCMPW256,
30009 IX86_BUILTIN_CMPPD256_MASK,
30010 IX86_BUILTIN_CMPPS256_MASK,
30011 IX86_BUILTIN_CMPQ128,
30012 IX86_BUILTIN_CMPD128,
30013 IX86_BUILTIN_UCMPQ128,
30014 IX86_BUILTIN_UCMPD128,
30015 IX86_BUILTIN_CMPB128,
30016 IX86_BUILTIN_CMPW128,
30017 IX86_BUILTIN_UCMPB128,
30018 IX86_BUILTIN_UCMPW128,
30019 IX86_BUILTIN_CMPPD128_MASK,
30020 IX86_BUILTIN_CMPPS128_MASK,
30022 IX86_BUILTIN_GATHER3SIV8SF,
30023 IX86_BUILTIN_GATHER3SIV4SF,
30024 IX86_BUILTIN_GATHER3SIV4DF,
30025 IX86_BUILTIN_GATHER3SIV2DF,
30026 IX86_BUILTIN_GATHER3DIV8SF,
30027 IX86_BUILTIN_GATHER3DIV4SF,
30028 IX86_BUILTIN_GATHER3DIV4DF,
30029 IX86_BUILTIN_GATHER3DIV2DF,
30030 IX86_BUILTIN_GATHER3SIV8SI,
30031 IX86_BUILTIN_GATHER3SIV4SI,
30032 IX86_BUILTIN_GATHER3SIV4DI,
30033 IX86_BUILTIN_GATHER3SIV2DI,
30034 IX86_BUILTIN_GATHER3DIV8SI,
30035 IX86_BUILTIN_GATHER3DIV4SI,
30036 IX86_BUILTIN_GATHER3DIV4DI,
30037 IX86_BUILTIN_GATHER3DIV2DI,
30038 IX86_BUILTIN_SCATTERSIV8SF,
30039 IX86_BUILTIN_SCATTERSIV4SF,
30040 IX86_BUILTIN_SCATTERSIV4DF,
30041 IX86_BUILTIN_SCATTERSIV2DF,
30042 IX86_BUILTIN_SCATTERDIV8SF,
30043 IX86_BUILTIN_SCATTERDIV4SF,
30044 IX86_BUILTIN_SCATTERDIV4DF,
30045 IX86_BUILTIN_SCATTERDIV2DF,
30046 IX86_BUILTIN_SCATTERSIV8SI,
30047 IX86_BUILTIN_SCATTERSIV4SI,
30048 IX86_BUILTIN_SCATTERSIV4DI,
30049 IX86_BUILTIN_SCATTERSIV2DI,
30050 IX86_BUILTIN_SCATTERDIV8SI,
30051 IX86_BUILTIN_SCATTERDIV4SI,
30052 IX86_BUILTIN_SCATTERDIV4DI,
30053 IX86_BUILTIN_SCATTERDIV2DI,
30056 IX86_BUILTIN_RANGESD128,
30057 IX86_BUILTIN_RANGESS128,
30058 IX86_BUILTIN_KUNPCKWD,
30059 IX86_BUILTIN_KUNPCKDQ,
30060 IX86_BUILTIN_BROADCASTF32x2_512,
30061 IX86_BUILTIN_BROADCASTI32x2_512,
30062 IX86_BUILTIN_BROADCASTF64X2_512,
30063 IX86_BUILTIN_BROADCASTI64X2_512,
30064 IX86_BUILTIN_BROADCASTF32X8_512,
30065 IX86_BUILTIN_BROADCASTI32X8_512,
30066 IX86_BUILTIN_EXTRACTF64X2_512,
30067 IX86_BUILTIN_EXTRACTF32X8,
30068 IX86_BUILTIN_EXTRACTI64X2_512,
30069 IX86_BUILTIN_EXTRACTI32X8,
30070 IX86_BUILTIN_REDUCEPD512_MASK,
30071 IX86_BUILTIN_REDUCEPS512_MASK,
30072 IX86_BUILTIN_PMULLQ512,
30073 IX86_BUILTIN_XORPD512,
30074 IX86_BUILTIN_XORPS512,
30075 IX86_BUILTIN_ORPD512,
30076 IX86_BUILTIN_ORPS512,
30077 IX86_BUILTIN_ANDPD512,
30078 IX86_BUILTIN_ANDPS512,
30079 IX86_BUILTIN_ANDNPD512,
30080 IX86_BUILTIN_ANDNPS512,
30081 IX86_BUILTIN_INSERTF32X8,
30082 IX86_BUILTIN_INSERTI32X8,
30083 IX86_BUILTIN_INSERTF64X2_512,
30084 IX86_BUILTIN_INSERTI64X2_512,
30085 IX86_BUILTIN_FPCLASSPD512,
30086 IX86_BUILTIN_FPCLASSPS512,
30087 IX86_BUILTIN_CVTD2MASK512,
30088 IX86_BUILTIN_CVTQ2MASK512,
30089 IX86_BUILTIN_CVTMASK2D512,
30090 IX86_BUILTIN_CVTMASK2Q512,
30091 IX86_BUILTIN_CVTPD2QQ512,
30092 IX86_BUILTIN_CVTPS2QQ512,
30093 IX86_BUILTIN_CVTPD2UQQ512,
30094 IX86_BUILTIN_CVTPS2UQQ512,
30095 IX86_BUILTIN_CVTQQ2PS512,
30096 IX86_BUILTIN_CVTUQQ2PS512,
30097 IX86_BUILTIN_CVTQQ2PD512,
30098 IX86_BUILTIN_CVTUQQ2PD512,
30099 IX86_BUILTIN_CVTTPS2QQ512,
30100 IX86_BUILTIN_CVTTPS2UQQ512,
30101 IX86_BUILTIN_CVTTPD2QQ512,
30102 IX86_BUILTIN_CVTTPD2UQQ512,
30103 IX86_BUILTIN_RANGEPS512,
30104 IX86_BUILTIN_RANGEPD512,
30107 IX86_BUILTIN_PACKUSDW512,
30108 IX86_BUILTIN_PACKSSDW512,
30109 IX86_BUILTIN_LOADDQUHI512_MASK,
30110 IX86_BUILTIN_LOADDQUQI512_MASK,
30111 IX86_BUILTIN_PSLLDQ512,
30112 IX86_BUILTIN_PSRLDQ512,
30113 IX86_BUILTIN_STOREDQUHI512_MASK,
30114 IX86_BUILTIN_STOREDQUQI512_MASK,
30115 IX86_BUILTIN_PALIGNR512,
30116 IX86_BUILTIN_PALIGNR512_MASK,
30117 IX86_BUILTIN_MOVDQUHI512_MASK,
30118 IX86_BUILTIN_MOVDQUQI512_MASK,
30119 IX86_BUILTIN_PSADBW512,
30120 IX86_BUILTIN_DBPSADBW512,
30121 IX86_BUILTIN_PBROADCASTB512,
30122 IX86_BUILTIN_PBROADCASTB512_GPR,
30123 IX86_BUILTIN_PBROADCASTW512,
30124 IX86_BUILTIN_PBROADCASTW512_GPR,
30125 IX86_BUILTIN_PMOVSXBW512_MASK,
30126 IX86_BUILTIN_PMOVZXBW512_MASK,
30127 IX86_BUILTIN_VPERMVARHI512_MASK,
30128 IX86_BUILTIN_VPERMT2VARHI512,
30129 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
30130 IX86_BUILTIN_VPERMI2VARHI512,
30131 IX86_BUILTIN_PAVGB512,
30132 IX86_BUILTIN_PAVGW512,
30133 IX86_BUILTIN_PADDB512,
30134 IX86_BUILTIN_PSUBB512,
30135 IX86_BUILTIN_PSUBSB512,
30136 IX86_BUILTIN_PADDSB512,
30137 IX86_BUILTIN_PSUBUSB512,
30138 IX86_BUILTIN_PADDUSB512,
30139 IX86_BUILTIN_PSUBW512,
30140 IX86_BUILTIN_PADDW512,
30141 IX86_BUILTIN_PSUBSW512,
30142 IX86_BUILTIN_PADDSW512,
30143 IX86_BUILTIN_PSUBUSW512,
30144 IX86_BUILTIN_PADDUSW512,
30145 IX86_BUILTIN_PMAXUW512,
30146 IX86_BUILTIN_PMAXSW512,
30147 IX86_BUILTIN_PMINUW512,
30148 IX86_BUILTIN_PMINSW512,
30149 IX86_BUILTIN_PMAXUB512,
30150 IX86_BUILTIN_PMAXSB512,
30151 IX86_BUILTIN_PMINUB512,
30152 IX86_BUILTIN_PMINSB512,
30153 IX86_BUILTIN_PMOVWB512,
30154 IX86_BUILTIN_PMOVSWB512,
30155 IX86_BUILTIN_PMOVUSWB512,
30156 IX86_BUILTIN_PMULHRSW512_MASK,
30157 IX86_BUILTIN_PMULHUW512_MASK,
30158 IX86_BUILTIN_PMULHW512_MASK,
30159 IX86_BUILTIN_PMULLW512_MASK,
30160 IX86_BUILTIN_PSLLWI512_MASK,
30161 IX86_BUILTIN_PSLLW512_MASK,
30162 IX86_BUILTIN_PACKSSWB512,
30163 IX86_BUILTIN_PACKUSWB512,
30164 IX86_BUILTIN_PSRAVV32HI,
30165 IX86_BUILTIN_PMADDUBSW512_MASK,
30166 IX86_BUILTIN_PMADDWD512_MASK,
30167 IX86_BUILTIN_PSRLVV32HI,
30168 IX86_BUILTIN_PUNPCKHBW512,
30169 IX86_BUILTIN_PUNPCKHWD512,
30170 IX86_BUILTIN_PUNPCKLBW512,
30171 IX86_BUILTIN_PUNPCKLWD512,
30172 IX86_BUILTIN_PSHUFB512,
30173 IX86_BUILTIN_PSHUFHW512,
30174 IX86_BUILTIN_PSHUFLW512,
30175 IX86_BUILTIN_PSRAWI512,
30176 IX86_BUILTIN_PSRAW512,
30177 IX86_BUILTIN_PSRLWI512,
30178 IX86_BUILTIN_PSRLW512,
30179 IX86_BUILTIN_CVTB2MASK512,
30180 IX86_BUILTIN_CVTW2MASK512,
30181 IX86_BUILTIN_CVTMASK2B512,
30182 IX86_BUILTIN_CVTMASK2W512,
30183 IX86_BUILTIN_PCMPEQB512_MASK,
30184 IX86_BUILTIN_PCMPEQW512_MASK,
30185 IX86_BUILTIN_PCMPGTB512_MASK,
30186 IX86_BUILTIN_PCMPGTW512_MASK,
30187 IX86_BUILTIN_PTESTMB512,
30188 IX86_BUILTIN_PTESTMW512,
30189 IX86_BUILTIN_PTESTNMB512,
30190 IX86_BUILTIN_PTESTNMW512,
30191 IX86_BUILTIN_PSLLVV32HI,
30192 IX86_BUILTIN_PABSB512,
30193 IX86_BUILTIN_PABSW512,
30194 IX86_BUILTIN_BLENDMW512,
30195 IX86_BUILTIN_BLENDMB512,
30196 IX86_BUILTIN_CMPB512,
30197 IX86_BUILTIN_CMPW512,
30198 IX86_BUILTIN_UCMPB512,
30199 IX86_BUILTIN_UCMPW512,
30201 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30202 where all operands are 32-byte or 64-byte wide respectively. */
30203 IX86_BUILTIN_GATHERALTSIV4DF,
30204 IX86_BUILTIN_GATHERALTDIV8SF,
30205 IX86_BUILTIN_GATHERALTSIV4DI,
30206 IX86_BUILTIN_GATHERALTDIV8SI,
30207 IX86_BUILTIN_GATHER3ALTDIV16SF,
30208 IX86_BUILTIN_GATHER3ALTDIV16SI,
30209 IX86_BUILTIN_GATHER3ALTSIV4DF,
30210 IX86_BUILTIN_GATHER3ALTDIV8SF,
30211 IX86_BUILTIN_GATHER3ALTSIV4DI,
30212 IX86_BUILTIN_GATHER3ALTDIV8SI,
30213 IX86_BUILTIN_GATHER3ALTSIV8DF,
30214 IX86_BUILTIN_GATHER3ALTSIV8DI,
30215 IX86_BUILTIN_GATHER3DIV16SF,
30216 IX86_BUILTIN_GATHER3DIV16SI,
30217 IX86_BUILTIN_GATHER3DIV8DF,
30218 IX86_BUILTIN_GATHER3DIV8DI,
30219 IX86_BUILTIN_GATHER3SIV16SF,
30220 IX86_BUILTIN_GATHER3SIV16SI,
30221 IX86_BUILTIN_GATHER3SIV8DF,
30222 IX86_BUILTIN_GATHER3SIV8DI,
30223 IX86_BUILTIN_SCATTERDIV16SF,
30224 IX86_BUILTIN_SCATTERDIV16SI,
30225 IX86_BUILTIN_SCATTERDIV8DF,
30226 IX86_BUILTIN_SCATTERDIV8DI,
30227 IX86_BUILTIN_SCATTERSIV16SF,
30228 IX86_BUILTIN_SCATTERSIV16SI,
30229 IX86_BUILTIN_SCATTERSIV8DF,
30230 IX86_BUILTIN_SCATTERSIV8DI,
30233 IX86_BUILTIN_GATHERPFQPD,
30234 IX86_BUILTIN_GATHERPFDPS,
30235 IX86_BUILTIN_GATHERPFDPD,
30236 IX86_BUILTIN_GATHERPFQPS,
30237 IX86_BUILTIN_SCATTERPFDPD,
30238 IX86_BUILTIN_SCATTERPFDPS,
30239 IX86_BUILTIN_SCATTERPFQPD,
30240 IX86_BUILTIN_SCATTERPFQPS,
30243 IX86_BUILTIN_EXP2PD_MASK,
30244 IX86_BUILTIN_EXP2PS_MASK,
30245 IX86_BUILTIN_EXP2PS,
30246 IX86_BUILTIN_RCP28PD,
30247 IX86_BUILTIN_RCP28PS,
30248 IX86_BUILTIN_RCP28SD,
30249 IX86_BUILTIN_RCP28SS,
30250 IX86_BUILTIN_RSQRT28PD,
30251 IX86_BUILTIN_RSQRT28PS,
30252 IX86_BUILTIN_RSQRT28SD,
30253 IX86_BUILTIN_RSQRT28SS,
30256 IX86_BUILTIN_VPMADD52LUQ512,
30257 IX86_BUILTIN_VPMADD52HUQ512,
30258 IX86_BUILTIN_VPMADD52LUQ256,
30259 IX86_BUILTIN_VPMADD52HUQ256,
30260 IX86_BUILTIN_VPMADD52LUQ128,
30261 IX86_BUILTIN_VPMADD52HUQ128,
30262 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30263 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30264 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30265 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30266 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30267 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30270 IX86_BUILTIN_VPMULTISHIFTQB512,
30271 IX86_BUILTIN_VPMULTISHIFTQB256,
30272 IX86_BUILTIN_VPMULTISHIFTQB128,
30273 IX86_BUILTIN_VPERMVARQI512_MASK,
30274 IX86_BUILTIN_VPERMT2VARQI512,
30275 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30276 IX86_BUILTIN_VPERMI2VARQI512,
30277 IX86_BUILTIN_VPERMVARQI256_MASK,
30278 IX86_BUILTIN_VPERMVARQI128_MASK,
30279 IX86_BUILTIN_VPERMT2VARQI256,
30280 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30281 IX86_BUILTIN_VPERMT2VARQI128,
30282 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30283 IX86_BUILTIN_VPERMI2VARQI256,
30284 IX86_BUILTIN_VPERMI2VARQI128,
30286 /* SHA builtins. */
30287 IX86_BUILTIN_SHA1MSG1,
30288 IX86_BUILTIN_SHA1MSG2,
30289 IX86_BUILTIN_SHA1NEXTE,
30290 IX86_BUILTIN_SHA1RNDS4,
30291 IX86_BUILTIN_SHA256MSG1,
30292 IX86_BUILTIN_SHA256MSG2,
30293 IX86_BUILTIN_SHA256RNDS2,
30295 /* CLWB instructions. */
30298 /* PCOMMIT instructions. */
30299 IX86_BUILTIN_PCOMMIT,
30301 /* CLFLUSHOPT instructions. */
30302 IX86_BUILTIN_CLFLUSHOPT,
30304 /* TFmode support builtins. */
30306 IX86_BUILTIN_HUGE_VALQ,
30307 IX86_BUILTIN_FABSQ,
30308 IX86_BUILTIN_COPYSIGNQ,
30310 /* Vectorizer support builtins. */
30311 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30312 IX86_BUILTIN_CPYSGNPS,
30313 IX86_BUILTIN_CPYSGNPD,
30314 IX86_BUILTIN_CPYSGNPS256,
30315 IX86_BUILTIN_CPYSGNPS512,
30316 IX86_BUILTIN_CPYSGNPD256,
30317 IX86_BUILTIN_CPYSGNPD512,
30318 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30319 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30322 /* FMA4 instructions. */
30323 IX86_BUILTIN_VFMADDSS,
30324 IX86_BUILTIN_VFMADDSD,
30325 IX86_BUILTIN_VFMADDPS,
30326 IX86_BUILTIN_VFMADDPD,
30327 IX86_BUILTIN_VFMADDPS256,
30328 IX86_BUILTIN_VFMADDPD256,
30329 IX86_BUILTIN_VFMADDSUBPS,
30330 IX86_BUILTIN_VFMADDSUBPD,
30331 IX86_BUILTIN_VFMADDSUBPS256,
30332 IX86_BUILTIN_VFMADDSUBPD256,
30334 /* FMA3 instructions. */
30335 IX86_BUILTIN_VFMADDSS3,
30336 IX86_BUILTIN_VFMADDSD3,
30338 /* XOP instructions. */
30339 IX86_BUILTIN_VPCMOV,
30340 IX86_BUILTIN_VPCMOV_V2DI,
30341 IX86_BUILTIN_VPCMOV_V4SI,
30342 IX86_BUILTIN_VPCMOV_V8HI,
30343 IX86_BUILTIN_VPCMOV_V16QI,
30344 IX86_BUILTIN_VPCMOV_V4SF,
30345 IX86_BUILTIN_VPCMOV_V2DF,
30346 IX86_BUILTIN_VPCMOV256,
30347 IX86_BUILTIN_VPCMOV_V4DI256,
30348 IX86_BUILTIN_VPCMOV_V8SI256,
30349 IX86_BUILTIN_VPCMOV_V16HI256,
30350 IX86_BUILTIN_VPCMOV_V32QI256,
30351 IX86_BUILTIN_VPCMOV_V8SF256,
30352 IX86_BUILTIN_VPCMOV_V4DF256,
30354 IX86_BUILTIN_VPPERM,
30356 IX86_BUILTIN_VPMACSSWW,
30357 IX86_BUILTIN_VPMACSWW,
30358 IX86_BUILTIN_VPMACSSWD,
30359 IX86_BUILTIN_VPMACSWD,
30360 IX86_BUILTIN_VPMACSSDD,
30361 IX86_BUILTIN_VPMACSDD,
30362 IX86_BUILTIN_VPMACSSDQL,
30363 IX86_BUILTIN_VPMACSSDQH,
30364 IX86_BUILTIN_VPMACSDQL,
30365 IX86_BUILTIN_VPMACSDQH,
30366 IX86_BUILTIN_VPMADCSSWD,
30367 IX86_BUILTIN_VPMADCSWD,
30369 IX86_BUILTIN_VPHADDBW,
30370 IX86_BUILTIN_VPHADDBD,
30371 IX86_BUILTIN_VPHADDBQ,
30372 IX86_BUILTIN_VPHADDWD,
30373 IX86_BUILTIN_VPHADDWQ,
30374 IX86_BUILTIN_VPHADDDQ,
30375 IX86_BUILTIN_VPHADDUBW,
30376 IX86_BUILTIN_VPHADDUBD,
30377 IX86_BUILTIN_VPHADDUBQ,
30378 IX86_BUILTIN_VPHADDUWD,
30379 IX86_BUILTIN_VPHADDUWQ,
30380 IX86_BUILTIN_VPHADDUDQ,
30381 IX86_BUILTIN_VPHSUBBW,
30382 IX86_BUILTIN_VPHSUBWD,
30383 IX86_BUILTIN_VPHSUBDQ,
30385 IX86_BUILTIN_VPROTB,
30386 IX86_BUILTIN_VPROTW,
30387 IX86_BUILTIN_VPROTD,
30388 IX86_BUILTIN_VPROTQ,
30389 IX86_BUILTIN_VPROTB_IMM,
30390 IX86_BUILTIN_VPROTW_IMM,
30391 IX86_BUILTIN_VPROTD_IMM,
30392 IX86_BUILTIN_VPROTQ_IMM,
30394 IX86_BUILTIN_VPSHLB,
30395 IX86_BUILTIN_VPSHLW,
30396 IX86_BUILTIN_VPSHLD,
30397 IX86_BUILTIN_VPSHLQ,
30398 IX86_BUILTIN_VPSHAB,
30399 IX86_BUILTIN_VPSHAW,
30400 IX86_BUILTIN_VPSHAD,
30401 IX86_BUILTIN_VPSHAQ,
30403 IX86_BUILTIN_VFRCZSS,
30404 IX86_BUILTIN_VFRCZSD,
30405 IX86_BUILTIN_VFRCZPS,
30406 IX86_BUILTIN_VFRCZPD,
30407 IX86_BUILTIN_VFRCZPS256,
30408 IX86_BUILTIN_VFRCZPD256,
30410 IX86_BUILTIN_VPCOMEQUB,
30411 IX86_BUILTIN_VPCOMNEUB,
30412 IX86_BUILTIN_VPCOMLTUB,
30413 IX86_BUILTIN_VPCOMLEUB,
30414 IX86_BUILTIN_VPCOMGTUB,
30415 IX86_BUILTIN_VPCOMGEUB,
30416 IX86_BUILTIN_VPCOMFALSEUB,
30417 IX86_BUILTIN_VPCOMTRUEUB,
30419 IX86_BUILTIN_VPCOMEQUW,
30420 IX86_BUILTIN_VPCOMNEUW,
30421 IX86_BUILTIN_VPCOMLTUW,
30422 IX86_BUILTIN_VPCOMLEUW,
30423 IX86_BUILTIN_VPCOMGTUW,
30424 IX86_BUILTIN_VPCOMGEUW,
30425 IX86_BUILTIN_VPCOMFALSEUW,
30426 IX86_BUILTIN_VPCOMTRUEUW,
30428 IX86_BUILTIN_VPCOMEQUD,
30429 IX86_BUILTIN_VPCOMNEUD,
30430 IX86_BUILTIN_VPCOMLTUD,
30431 IX86_BUILTIN_VPCOMLEUD,
30432 IX86_BUILTIN_VPCOMGTUD,
30433 IX86_BUILTIN_VPCOMGEUD,
30434 IX86_BUILTIN_VPCOMFALSEUD,
30435 IX86_BUILTIN_VPCOMTRUEUD,
30437 IX86_BUILTIN_VPCOMEQUQ,
30438 IX86_BUILTIN_VPCOMNEUQ,
30439 IX86_BUILTIN_VPCOMLTUQ,
30440 IX86_BUILTIN_VPCOMLEUQ,
30441 IX86_BUILTIN_VPCOMGTUQ,
30442 IX86_BUILTIN_VPCOMGEUQ,
30443 IX86_BUILTIN_VPCOMFALSEUQ,
30444 IX86_BUILTIN_VPCOMTRUEUQ,
30446 IX86_BUILTIN_VPCOMEQB,
30447 IX86_BUILTIN_VPCOMNEB,
30448 IX86_BUILTIN_VPCOMLTB,
30449 IX86_BUILTIN_VPCOMLEB,
30450 IX86_BUILTIN_VPCOMGTB,
30451 IX86_BUILTIN_VPCOMGEB,
30452 IX86_BUILTIN_VPCOMFALSEB,
30453 IX86_BUILTIN_VPCOMTRUEB,
30455 IX86_BUILTIN_VPCOMEQW,
30456 IX86_BUILTIN_VPCOMNEW,
30457 IX86_BUILTIN_VPCOMLTW,
30458 IX86_BUILTIN_VPCOMLEW,
30459 IX86_BUILTIN_VPCOMGTW,
30460 IX86_BUILTIN_VPCOMGEW,
30461 IX86_BUILTIN_VPCOMFALSEW,
30462 IX86_BUILTIN_VPCOMTRUEW,
30464 IX86_BUILTIN_VPCOMEQD,
30465 IX86_BUILTIN_VPCOMNED,
30466 IX86_BUILTIN_VPCOMLTD,
30467 IX86_BUILTIN_VPCOMLED,
30468 IX86_BUILTIN_VPCOMGTD,
30469 IX86_BUILTIN_VPCOMGED,
30470 IX86_BUILTIN_VPCOMFALSED,
30471 IX86_BUILTIN_VPCOMTRUED,
30473 IX86_BUILTIN_VPCOMEQQ,
30474 IX86_BUILTIN_VPCOMNEQ,
30475 IX86_BUILTIN_VPCOMLTQ,
30476 IX86_BUILTIN_VPCOMLEQ,
30477 IX86_BUILTIN_VPCOMGTQ,
30478 IX86_BUILTIN_VPCOMGEQ,
30479 IX86_BUILTIN_VPCOMFALSEQ,
30480 IX86_BUILTIN_VPCOMTRUEQ,
30482 /* LWP instructions. */
30483 IX86_BUILTIN_LLWPCB,
30484 IX86_BUILTIN_SLWPCB,
30485 IX86_BUILTIN_LWPVAL32,
30486 IX86_BUILTIN_LWPVAL64,
30487 IX86_BUILTIN_LWPINS32,
30488 IX86_BUILTIN_LWPINS64,
30493 IX86_BUILTIN_XBEGIN,
30495 IX86_BUILTIN_XABORT,
30496 IX86_BUILTIN_XTEST,
30499 IX86_BUILTIN_BNDMK,
30500 IX86_BUILTIN_BNDSTX,
30501 IX86_BUILTIN_BNDLDX,
30502 IX86_BUILTIN_BNDCL,
30503 IX86_BUILTIN_BNDCU,
30504 IX86_BUILTIN_BNDRET,
30505 IX86_BUILTIN_BNDNARROW,
30506 IX86_BUILTIN_BNDINT,
30507 IX86_BUILTIN_SIZEOF,
30508 IX86_BUILTIN_BNDLOWER,
30509 IX86_BUILTIN_BNDUPPER,
30511 /* BMI instructions. */
30512 IX86_BUILTIN_BEXTR32,
30513 IX86_BUILTIN_BEXTR64,
30516 /* TBM instructions. */
30517 IX86_BUILTIN_BEXTRI32,
30518 IX86_BUILTIN_BEXTRI64,
30520 /* BMI2 instructions. */
30521 IX86_BUILTIN_BZHI32,
30522 IX86_BUILTIN_BZHI64,
30523 IX86_BUILTIN_PDEP32,
30524 IX86_BUILTIN_PDEP64,
30525 IX86_BUILTIN_PEXT32,
30526 IX86_BUILTIN_PEXT64,
30528 /* ADX instructions. */
30529 IX86_BUILTIN_ADDCARRYX32,
30530 IX86_BUILTIN_ADDCARRYX64,
30532 /* SBB instructions. */
30533 IX86_BUILTIN_SBB32,
30534 IX86_BUILTIN_SBB64,
30536 /* FSGSBASE instructions. */
30537 IX86_BUILTIN_RDFSBASE32,
30538 IX86_BUILTIN_RDFSBASE64,
30539 IX86_BUILTIN_RDGSBASE32,
30540 IX86_BUILTIN_RDGSBASE64,
30541 IX86_BUILTIN_WRFSBASE32,
30542 IX86_BUILTIN_WRFSBASE64,
30543 IX86_BUILTIN_WRGSBASE32,
30544 IX86_BUILTIN_WRGSBASE64,
30546 /* RDRND instructions. */
30547 IX86_BUILTIN_RDRAND16_STEP,
30548 IX86_BUILTIN_RDRAND32_STEP,
30549 IX86_BUILTIN_RDRAND64_STEP,
30551 /* RDSEED instructions. */
30552 IX86_BUILTIN_RDSEED16_STEP,
30553 IX86_BUILTIN_RDSEED32_STEP,
30554 IX86_BUILTIN_RDSEED64_STEP,
30556 /* F16C instructions. */
30557 IX86_BUILTIN_CVTPH2PS,
30558 IX86_BUILTIN_CVTPH2PS256,
30559 IX86_BUILTIN_CVTPS2PH,
30560 IX86_BUILTIN_CVTPS2PH256,
30562 /* CFString built-in for darwin */
30563 IX86_BUILTIN_CFSTRING,
30565 /* Builtins to get CPU type and supported features. */
30566 IX86_BUILTIN_CPU_INIT,
30567 IX86_BUILTIN_CPU_IS,
30568 IX86_BUILTIN_CPU_SUPPORTS,
30570 /* Read/write FLAGS register built-ins. */
30571 IX86_BUILTIN_READ_FLAGS,
30572 IX86_BUILTIN_WRITE_FLAGS,
30577 /* Table for the ix86 builtin decls. */
30578 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30580 /* Table of all of the builtin functions that are possible with different ISA's
30581 but are waiting to be built until a function is declared to use that
30583 struct builtin_isa {
30584 const char *name; /* function name */
30585 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30586 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30587 bool const_p; /* true if the declaration is constant */
30588 bool leaf_p; /* true if the declaration has leaf attribute */
30589 bool nothrow_p; /* true if the declaration has nothrow attribute */
30590 bool set_and_not_built_p;
30593 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30596 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30597 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30598 function decl in the ix86_builtins array. Returns the function decl or
30599 NULL_TREE, if the builtin was not added.
30601 If the front end has a special hook for builtin functions, delay adding
30602 builtin functions that aren't in the current ISA until the ISA is changed
30603 with function specific optimization. Doing so, can save about 300K for the
30604 default compiler. When the builtin is expanded, check at that time whether
30607 If the front end doesn't have a special hook, record all builtins, even if
30608 it isn't an instruction set in the current ISA in case the user uses
30609 function specific options for a different ISA, so that we don't get scope
30610 errors if a builtin is added in the middle of a function scope. */
30613 def_builtin (HOST_WIDE_INT mask, const char *name,
30614 enum ix86_builtin_func_type tcode,
30615 enum ix86_builtins code)
30617 tree decl = NULL_TREE;
30619 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30621 ix86_builtins_isa[(int) code].isa = mask;
30623 mask &= ~OPTION_MASK_ISA_64BIT;
30625 || (mask & ix86_isa_flags) != 0
30626 || (lang_hooks.builtin_function
30627 == lang_hooks.builtin_function_ext_scope))
30630 tree type = ix86_get_builtin_func_type (tcode);
30631 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30633 ix86_builtins[(int) code] = decl;
30634 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30638 ix86_builtins[(int) code] = NULL_TREE;
30639 ix86_builtins_isa[(int) code].tcode = tcode;
30640 ix86_builtins_isa[(int) code].name = name;
30641 ix86_builtins_isa[(int) code].leaf_p = false;
30642 ix86_builtins_isa[(int) code].nothrow_p = false;
30643 ix86_builtins_isa[(int) code].const_p = false;
30644 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30651 /* Like def_builtin, but also marks the function decl "const". */
30654 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30655 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30657 tree decl = def_builtin (mask, name, tcode, code);
30659 TREE_READONLY (decl) = 1;
30661 ix86_builtins_isa[(int) code].const_p = true;
30666 /* Add any new builtin functions for a given ISA that may not have been
30667 declared. This saves a bit of space compared to adding all of the
30668 declarations to the tree, even if we didn't use them. */
30671 ix86_add_new_builtins (HOST_WIDE_INT isa)
30674 tree saved_current_target_pragma = current_target_pragma;
30675 current_target_pragma = NULL_TREE;
30677 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30679 if ((ix86_builtins_isa[i].isa & isa) != 0
30680 && ix86_builtins_isa[i].set_and_not_built_p)
30684 /* Don't define the builtin again. */
30685 ix86_builtins_isa[i].set_and_not_built_p = false;
30687 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30688 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30689 type, i, BUILT_IN_MD, NULL,
30692 ix86_builtins[i] = decl;
30693 if (ix86_builtins_isa[i].const_p)
30694 TREE_READONLY (decl) = 1;
30695 if (ix86_builtins_isa[i].leaf_p)
30696 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30698 if (ix86_builtins_isa[i].nothrow_p)
30699 TREE_NOTHROW (decl) = 1;
30703 current_target_pragma = saved_current_target_pragma;
30706 /* Bits for builtin_description.flag. */
30708 /* Set when we don't support the comparison natively, and should
30709 swap_comparison in order to support it. */
30710 #define BUILTIN_DESC_SWAP_OPERANDS 1
30712 struct builtin_description
30714 const HOST_WIDE_INT mask;
30715 const enum insn_code icode;
30716 const char *const name;
30717 const enum ix86_builtins code;
30718 const enum rtx_code comparison;
30722 static const struct builtin_description bdesc_comi[] =
30724 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30725 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30726 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30727 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30728 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30729 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30730 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30731 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30732 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30733 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30734 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30735 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30736 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30737 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30738 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30739 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30740 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30741 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30742 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30743 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30744 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30745 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30746 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30747 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30750 static const struct builtin_description bdesc_pcmpestr[] =
30753 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30754 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30755 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30756 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30757 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30758 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30759 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30762 static const struct builtin_description bdesc_pcmpistr[] =
30765 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30766 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30767 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30768 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30769 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30770 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30771 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30774 /* Special builtins with variable number of arguments. */
30775 static const struct builtin_description bdesc_special_args[] =
30777 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30778 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30779 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30781 /* 80387 (for use internally for atomic compound assignment). */
30782 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30783 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30784 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30785 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30788 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30791 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30793 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30794 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30795 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30796 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30797 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30798 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30799 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30800 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30801 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30803 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30804 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30805 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30806 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30807 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30808 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30809 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30810 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30813 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30814 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30815 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30817 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30818 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30819 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30820 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30822 /* SSE or 3DNow!A */
30823 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30824 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
30827 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30828 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30829 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30830 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
30831 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30832 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
30833 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
30834 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
30835 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
30836 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30838 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30839 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30842 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30845 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
30848 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30849 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
30853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
30855 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30856 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30857 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30858 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
30859 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
30861 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30862 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30864 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30865 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30866 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
30867 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30869 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
30870 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30871 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
30874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
30875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
30876 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
30877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
30878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
30879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
30880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
30883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
30884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
30885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
30886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
30887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
30888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
30889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
30890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
30891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
30894 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30895 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30896 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30897 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30898 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30899 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30907 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30911 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30913 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
30915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
30916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
30917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
30918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30942 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
30943 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
30944 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
30945 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
30946 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
30947 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
30950 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30951 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30952 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30953 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30954 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30955 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30956 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30957 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30960 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30961 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
30962 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
30965 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
30966 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
30967 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
30968 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
30971 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
30972 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
30973 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
30974 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
30975 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30976 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30977 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30978 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30979 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30980 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30981 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30982 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30983 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30984 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30985 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30986 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30987 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30988 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30989 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30990 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30991 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30992 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30993 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30994 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30995 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30996 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30997 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30998 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30999 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31000 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31001 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31002 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31003 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31004 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31005 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31006 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31007 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
31008 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
31009 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
31010 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
31011 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31012 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31013 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31014 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31015 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31016 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31017 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31018 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31019 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31020 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31021 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31022 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31023 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31024 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31025 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31027 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31028 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31029 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31030 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31031 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31032 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31033 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31034 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31035 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31036 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31037 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31038 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31040 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31041 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31042 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31043 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31044 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31045 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31046 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31047 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31048 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31049 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31050 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31051 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31052 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31053 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31054 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31055 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31056 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31057 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31059 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31060 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31061 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31062 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31063 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31064 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31067 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
31070 /* Builtins with variable number of arguments. */
31071 static const struct builtin_description bdesc_args[] =
31073 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
31074 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
31075 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
31076 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31077 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31078 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31079 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31082 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31083 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31084 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31085 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31086 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31087 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31089 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31090 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31091 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31092 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31093 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31094 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31095 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31096 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31098 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31099 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31101 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31102 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31103 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31104 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31106 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31107 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31108 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31109 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31110 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31111 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31113 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31114 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31115 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31116 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31117 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
31118 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
31120 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31121 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
31122 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31124 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
31126 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31127 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31128 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31129 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31130 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31131 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31133 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31134 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31135 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31136 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31137 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31138 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31140 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31141 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31142 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31143 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31146 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31147 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31148 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31149 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31151 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31152 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31153 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31154 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31155 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31156 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31157 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31158 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31159 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31160 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31161 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31162 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31163 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31164 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31165 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31168 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31169 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31170 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31171 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31172 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31173 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31176 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31177 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31178 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31179 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31180 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31181 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31182 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31183 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31184 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31185 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31186 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31187 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31189 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31191 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31192 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31193 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31194 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31195 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31196 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31197 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31198 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31200 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31201 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31202 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31203 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31204 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31205 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31206 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31207 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31208 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31209 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31210 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31211 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31212 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31213 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31214 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31215 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31216 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31217 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31218 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31219 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31221 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31222 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31223 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31224 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31226 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31227 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31228 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31229 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31231 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31233 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31234 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31235 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31236 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31237 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31239 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31240 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31241 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31243 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31245 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31246 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31247 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31249 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31250 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31252 /* SSE MMX or 3Dnow!A */
31253 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31254 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31255 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31257 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31258 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31259 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31260 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31262 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31263 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31265 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31268 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31270 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31271 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31272 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31273 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31274 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31276 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31277 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31278 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31279 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31280 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31282 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31284 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31285 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31286 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31287 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31289 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31290 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31291 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31293 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31294 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31295 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31296 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31297 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31298 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31299 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31300 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31302 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31303 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31304 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31305 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31308 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31309 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31310 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31312 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31313 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31314 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31324 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31325 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31328 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31330 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31331 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31333 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31335 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31336 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31337 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31339 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31341 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31342 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31343 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31344 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31345 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31346 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31347 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31348 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31351 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31352 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31353 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31354 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31356 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31359 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31360 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31362 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31363 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31364 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31365 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31367 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31368 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31370 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31371 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31372 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31373 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31374 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31375 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31377 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31378 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31379 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31380 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31382 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31383 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31384 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31385 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31386 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31387 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31388 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31389 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31393 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31395 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31396 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31398 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31399 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31404 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31409 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31410 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31411 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31412 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31413 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31414 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31417 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31418 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31419 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31420 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31421 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31422 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31424 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31425 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31426 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31427 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31435 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31438 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31439 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31442 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31443 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31445 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31446 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31447 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31448 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31449 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31450 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31453 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31454 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31455 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31456 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31457 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31458 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31460 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31461 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31462 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31463 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31464 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31465 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31466 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31467 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31468 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31469 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31470 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31471 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31472 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31473 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31474 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31475 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31476 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31477 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31478 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31479 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31480 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31481 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31482 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31483 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31486 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31487 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31490 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31491 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31492 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31493 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31494 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31495 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31496 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31497 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31498 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31499 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31501 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31502 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31503 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31504 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31505 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31506 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31507 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31508 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31509 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31510 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31511 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31512 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31513 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31515 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31516 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31517 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31518 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31519 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31520 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31521 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31522 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31523 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31524 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31525 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31526 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31529 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31530 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31531 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31532 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31534 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31535 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31536 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31537 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31539 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31540 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31542 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31543 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31545 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31546 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31547 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31548 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31550 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31551 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31553 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31554 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31556 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31557 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31558 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31561 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31562 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31563 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31564 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31565 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31568 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31569 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31570 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31571 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31574 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31575 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31577 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31578 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31579 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31580 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31583 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31586 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31587 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31588 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31589 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31590 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31591 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31592 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31593 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31594 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31595 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31596 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31597 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31598 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31599 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31600 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31601 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31602 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31603 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31604 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31605 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31606 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31607 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31608 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31609 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31610 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31611 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31613 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31614 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31615 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31616 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31618 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31619 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31620 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31621 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31622 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31623 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31624 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31625 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31626 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31627 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31628 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31629 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31630 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31631 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31632 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31633 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31634 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31635 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31636 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31637 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31638 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31639 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31640 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31641 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31642 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31643 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31644 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31645 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31646 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31647 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31648 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31649 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31650 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31651 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31653 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31654 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31655 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31657 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31658 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31659 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31660 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31661 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31663 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31665 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31666 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31668 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31669 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31670 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31671 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31673 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31674 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31676 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31677 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31679 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31680 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31681 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31682 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31684 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31685 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31687 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31688 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31690 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31691 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31693 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31698 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31699 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31700 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31706 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31721 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31722 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31724 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31727 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31728 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31729 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31730 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31731 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31732 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31733 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31734 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31735 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31736 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31737 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31738 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31739 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31740 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31741 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31742 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31743 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31744 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31745 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31746 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31747 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31748 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31749 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31750 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31751 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31752 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31753 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31754 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31755 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31756 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31757 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31758 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31759 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31760 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31761 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31762 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31763 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31764 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31765 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31766 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31767 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31768 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31769 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31770 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31771 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31772 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31773 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31774 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31775 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31776 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31777 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31778 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31779 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31780 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31781 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31782 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31783 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31784 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31785 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31786 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31787 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31788 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31789 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31790 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31791 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31792 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31793 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31794 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31795 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31796 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31797 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31798 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31799 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31800 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31801 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31802 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31803 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31804 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31805 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31806 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31807 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31808 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31809 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31810 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31811 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31812 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31813 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31814 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31815 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31816 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31817 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31818 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31819 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31820 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31821 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31822 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31823 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31824 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31825 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31826 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31827 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31828 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31829 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31830 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31831 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31832 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31833 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31834 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31835 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31836 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31837 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31838 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31839 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31840 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31841 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31842 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31843 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31844 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31845 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31846 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31847 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31848 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
31849 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
31850 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31851 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31852 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31853 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31854 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31855 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31856 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31857 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31858 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31859 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
31860 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
31861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
31862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
31863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31874 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31877 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31878 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31879 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31882 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31883 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31886 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
31887 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
31888 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
31889 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
31892 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31893 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31894 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31895 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31896 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31897 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31900 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
31901 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
31902 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
31903 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
31904 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
31905 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
31906 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31907 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31908 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31909 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31910 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31911 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31912 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31913 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
31914 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31915 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
31916 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
31917 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31918 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31919 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31920 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31921 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31922 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31923 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
31924 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31925 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
31926 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31927 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31928 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31929 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31930 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
31931 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
31932 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
31933 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
31934 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
31935 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
31936 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
31937 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
31938 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31939 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31940 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31941 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31942 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31943 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31944 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31945 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31946 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31947 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31948 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31949 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31950 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31951 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31952 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31953 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31954 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
31955 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
31956 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
31957 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
31958 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31959 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
32059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
32060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
32061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
32062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32066 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32067 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32068 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32069 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
32098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
32099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
32100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32101 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
32103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32106 /* Mask arithmetic operations */
32107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
32110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
32114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
32119 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32120 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32121 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32122 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32123 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32124 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32125 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
32128 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
32129 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
32130 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32131 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32132 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32133 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32134 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32135 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32136 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32137 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32138 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32139 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32140 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32141 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32142 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32143 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32144 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32145 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32146 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32147 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32148 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32149 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32150 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32151 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32152 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32153 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32154 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32155 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32156 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32157 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32158 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32159 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32160 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32161 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32162 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32163 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32164 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32165 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32166 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32167 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32168 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32169 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32170 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32171 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32172 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32173 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32174 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32175 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32176 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32177 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32178 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32179 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32180 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32183 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32184 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32185 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32186 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32187 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32188 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32189 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32190 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32191 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32192 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32193 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32194 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32195 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32196 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32197 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32198 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32199 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32200 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32205 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32206 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32207 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32208 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32209 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32210 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32211 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32212 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32213 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32214 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32217 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32218 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32219 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32220 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32221 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32222 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32223 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32224 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32225 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32226 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32227 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32228 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32229 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32230 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32231 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32232 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32233 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32234 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32235 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32236 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32237 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32238 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32239 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32241 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32242 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32243 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32244 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32246 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32247 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32248 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32252 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32253 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32254 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32255 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32256 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32257 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32258 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32259 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32263 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32264 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32265 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32266 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32267 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32268 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32269 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32270 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32271 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32272 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32273 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32274 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32275 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32276 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32277 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32278 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32279 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32280 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32281 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32282 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32283 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32284 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32285 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32286 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32287 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32288 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32289 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32290 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32291 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32292 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32293 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32294 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32295 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32296 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32297 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32298 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32299 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32300 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32301 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32302 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32303 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32304 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32305 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32306 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32307 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32308 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32309 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32310 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32311 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32312 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32313 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32314 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32315 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32316 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32317 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32318 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32319 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32320 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32321 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32322 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32323 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32324 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32325 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32326 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32328 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32329 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32330 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32331 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32332 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32333 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32339 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32342 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32343 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32346 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32347 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32348 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32349 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32352 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32353 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32354 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32355 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32356 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32357 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32358 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32359 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32360 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32361 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32362 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32363 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32364 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32365 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32366 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32367 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32368 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32369 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32370 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32371 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32372 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32373 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32374 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32384 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32385 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32386 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32387 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32388 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32389 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32390 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32391 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32392 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32393 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32394 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32395 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32396 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32397 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32398 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32399 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32400 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32401 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32402 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32404 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32405 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32406 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32407 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32408 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32409 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32417 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32418 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32419 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32420 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32421 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32424 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32425 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32426 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32427 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32428 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32429 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32430 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32431 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32432 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32433 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32436 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32437 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32440 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32441 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32442 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32446 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32447 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32448 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32449 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32450 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32451 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32452 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32453 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32456 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32457 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32458 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32459 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32460 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32461 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32464 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32465 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32466 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32467 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32468 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32469 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32470 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32471 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32472 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32473 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32474 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32475 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32476 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32478 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32479 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32480 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32481 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32482 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32483 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32484 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32485 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32486 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32487 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32488 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32489 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32490 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32491 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32494 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32495 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32496 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32497 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32498 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32499 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32500 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32503 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32504 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32505 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32506 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32507 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32509 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32510 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32511 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32512 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32513 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32514 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32515 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32516 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32517 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32518 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32519 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32520 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32521 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32523 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32524 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32525 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32526 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32527 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32528 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32529 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32538 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32539 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32540 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32541 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32542 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32543 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32544 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32545 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32546 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32547 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32548 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32549 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32550 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32551 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32552 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32553 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32554 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32555 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32556 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32557 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32558 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32560 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32561 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32562 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32563 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32564 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32565 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32566 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32567 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32568 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32570 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32571 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32572 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32573 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32574 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32575 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32576 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32577 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32578 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32579 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32580 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32581 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32582 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32583 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32584 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32585 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32586 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32587 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32588 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32589 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32590 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32591 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32592 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32593 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32594 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32595 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32596 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32597 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32598 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32599 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32600 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32602 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32603 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32604 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32605 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32606 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32607 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32608 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32609 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32610 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32611 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32612 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32613 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32614 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32616 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32617 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32618 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32619 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32620 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32621 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32622 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32623 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32624 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32625 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32626 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32627 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32628 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32629 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32630 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32631 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32632 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32633 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32634 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32635 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32636 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32637 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32638 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32639 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32640 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32641 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32642 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32643 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32644 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32645 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32646 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32647 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32648 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32649 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32650 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32651 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32652 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32653 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32654 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32655 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32656 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32657 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32658 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32659 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32660 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32661 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32662 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32663 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32664 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32665 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32666 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32667 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32668 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32669 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32670 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32671 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32672 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32673 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32674 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32675 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32676 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32677 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32678 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32679 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32680 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32681 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32682 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32683 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32684 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32685 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32686 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32687 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32688 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32689 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32690 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32691 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32692 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32693 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32694 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32695 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32696 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32697 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32698 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32699 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32700 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32701 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32702 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32703 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32704 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32705 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32706 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32707 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32708 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32709 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32710 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32711 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32712 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32713 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32714 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32715 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32716 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32717 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32718 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32719 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32720 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32721 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32722 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32723 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32724 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32725 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32726 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32727 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32728 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32729 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32730 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32731 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32732 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32733 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32734 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32735 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32736 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32737 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32738 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32739 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32740 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32741 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32742 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32743 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32744 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32745 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32746 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32747 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32748 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32749 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32750 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32751 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32752 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32753 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32754 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32755 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32756 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32757 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32758 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32759 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32760 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32761 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32762 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32763 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32764 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32765 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32766 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32767 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32768 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32769 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32770 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32771 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32772 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32773 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32774 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32775 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32776 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32777 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32778 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32779 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32780 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32781 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32782 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32783 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32784 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32785 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32786 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32787 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32788 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32789 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32790 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32791 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32792 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32793 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32794 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32795 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32796 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32797 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32798 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32799 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32800 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32801 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32802 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32803 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32804 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32805 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32806 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32807 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32808 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32809 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32810 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32811 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32812 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32813 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32814 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32815 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32816 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32817 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32818 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32819 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32820 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32821 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32822 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32823 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32824 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32825 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32826 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32827 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32828 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
32829 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
32830 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32831 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32832 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32833 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32834 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32835 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32836 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32837 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32838 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
32839 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
32842 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32843 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32844 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32845 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32846 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
32847 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
32848 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
32849 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
32850 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
32851 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
32852 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32853 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32854 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32855 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32856 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32857 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32858 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32859 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32860 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32861 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
32862 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32863 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
32864 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
32865 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
32866 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
32867 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
32868 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
32869 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
32870 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
32871 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
32872 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
32875 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
32876 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
32877 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32878 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32879 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32880 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32881 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
32882 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
32883 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32884 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32885 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
32886 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
32887 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
32888 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
32889 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
32890 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
32891 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32892 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32893 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32894 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32895 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32896 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32897 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32898 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32899 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32900 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32901 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32902 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32903 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32904 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32905 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32906 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32907 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32908 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32909 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32910 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32911 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32912 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32913 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32914 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32915 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32916 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32917 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32918 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32919 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32920 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32921 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32922 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32923 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32924 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32925 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32926 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32927 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32928 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32929 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32930 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32931 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
32932 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
32933 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32934 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32935 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32936 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32937 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32938 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32939 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32940 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32941 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32942 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32943 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32944 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32945 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
32946 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
32947 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
32948 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
32949 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32950 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32951 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32952 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32953 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32954 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32955 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32956 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32957 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32958 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32959 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32960 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32961 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32962 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32963 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32964 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32965 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32968 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32969 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32970 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32971 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32972 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32973 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32974 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32975 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32976 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32977 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32978 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32979 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32982 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32983 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32984 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32985 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32986 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32987 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32988 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32989 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32990 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32991 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32992 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32993 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32994 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32995 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32996 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32999 /* Builtins with rounding support. */
33000 static const struct builtin_description bdesc_round_args[] =
33003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
33008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
33009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
33010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
33011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
33012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
33013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
33016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
33018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
33020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
33022 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
33023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
33024 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
33025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
33026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33031 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
33032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
33033 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
33034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33083 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33085 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33087 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33089 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33091 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33093 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33095 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33097 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
33105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
33106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33124 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33125 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33126 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33127 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33128 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33129 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33130 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33131 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33132 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33133 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33136 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33137 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33138 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33139 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33140 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33141 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33142 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33143 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33144 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33145 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33146 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33147 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33148 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33149 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33150 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
33151 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33154 /* Bultins for MPX. */
33155 static const struct builtin_description bdesc_mpx[] =
33157 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33158 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33159 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33162 /* Const builtins for MPX. */
33163 static const struct builtin_description bdesc_mpx_const[] =
33165 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33166 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33167 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33168 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33169 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33170 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33171 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33172 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33175 /* FMA4 and XOP. */
33176 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33177 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33178 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33179 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33180 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33181 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33182 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33183 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33184 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33185 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33186 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33187 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33188 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33189 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33190 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33191 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33192 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33193 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33194 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33195 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33196 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33197 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33198 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33199 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33200 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33201 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33202 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33203 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33204 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33205 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33206 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33207 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33208 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33209 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33210 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33211 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33212 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33213 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33214 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33215 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33216 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33217 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33218 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33219 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33220 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33221 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33222 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33223 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33224 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33225 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33226 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33227 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33229 static const struct builtin_description bdesc_multi_arg[] =
33231 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33232 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33233 UNKNOWN, (int)MULTI_ARG_3_SF },
33234 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33235 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33236 UNKNOWN, (int)MULTI_ARG_3_DF },
33238 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33239 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33240 UNKNOWN, (int)MULTI_ARG_3_SF },
33241 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33242 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33243 UNKNOWN, (int)MULTI_ARG_3_DF },
33245 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33246 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33247 UNKNOWN, (int)MULTI_ARG_3_SF },
33248 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33249 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33250 UNKNOWN, (int)MULTI_ARG_3_DF },
33251 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33252 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33253 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33254 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33255 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33256 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33258 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33259 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33260 UNKNOWN, (int)MULTI_ARG_3_SF },
33261 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33262 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33263 UNKNOWN, (int)MULTI_ARG_3_DF },
33264 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33265 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33266 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33267 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33268 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33269 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33272 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33273 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33274 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33275 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33276 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33277 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33279 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33280 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33281 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33282 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33283 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33284 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33285 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33287 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33289 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33290 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33291 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33292 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33293 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33294 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33295 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33296 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33297 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33298 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33299 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33300 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33302 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33303 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33304 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33305 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33306 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33307 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33308 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33309 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33310 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33311 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33312 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33313 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33314 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33315 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33316 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33317 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33319 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33320 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33321 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33322 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33323 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33324 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33326 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33327 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33328 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33329 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33330 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33331 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33332 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33333 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33334 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33335 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33336 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33337 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33338 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33339 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33340 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33342 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33343 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33344 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33345 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33346 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33347 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33348 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33350 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33351 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33352 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33353 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33354 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33355 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33356 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33358 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33359 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33360 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33361 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33362 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33363 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33364 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33366 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33367 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33368 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33369 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33370 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33371 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33372 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33374 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33375 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33376 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33377 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33378 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33379 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33380 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33382 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33383 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33384 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33385 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33386 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33387 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33388 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33390 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33391 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33392 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33393 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33394 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33395 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33396 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33398 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33399 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33400 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33401 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33402 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33403 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33404 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33406 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33407 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33408 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33409 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33410 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33411 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33412 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33413 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33415 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33416 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33417 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33418 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33419 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33420 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33421 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33422 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33425 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33427 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33431 /* TM vector builtins. */
33433 /* Reuse the existing x86-specific `struct builtin_description' cause
33434 we're lazy. Add casts to make them fit. */
33435 static const struct builtin_description bdesc_tm[] =
33437 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33438 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33439 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33440 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33441 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33442 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33443 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33445 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33446 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33447 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33448 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33449 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33450 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33451 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33453 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33454 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33455 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33456 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33457 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33458 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33459 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33461 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33462 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33463 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33466 /* TM callbacks. */
33468 /* Return the builtin decl needed to load a vector of TYPE. */
33471 ix86_builtin_tm_load (tree type)
33473 if (TREE_CODE (type) == VECTOR_TYPE)
33475 switch (tree_to_uhwi (TYPE_SIZE (type)))
33478 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33480 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33482 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33488 /* Return the builtin decl needed to store a vector of TYPE. */
33491 ix86_builtin_tm_store (tree type)
33493 if (TREE_CODE (type) == VECTOR_TYPE)
33495 switch (tree_to_uhwi (TYPE_SIZE (type)))
33498 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33500 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33502 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33508 /* Initialize the transactional memory vector load/store builtins. */
33511 ix86_init_tm_builtins (void)
33513 enum ix86_builtin_func_type ftype;
33514 const struct builtin_description *d;
33517 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33518 tree attrs_log, attrs_type_log;
33523 /* If there are no builtins defined, we must be compiling in a
33524 language without trans-mem support. */
33525 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33528 /* Use whatever attributes a normal TM load has. */
33529 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33530 attrs_load = DECL_ATTRIBUTES (decl);
33531 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33532 /* Use whatever attributes a normal TM store has. */
33533 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33534 attrs_store = DECL_ATTRIBUTES (decl);
33535 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33536 /* Use whatever attributes a normal TM log has. */
33537 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33538 attrs_log = DECL_ATTRIBUTES (decl);
33539 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33541 for (i = 0, d = bdesc_tm;
33542 i < ARRAY_SIZE (bdesc_tm);
33545 if ((d->mask & ix86_isa_flags) != 0
33546 || (lang_hooks.builtin_function
33547 == lang_hooks.builtin_function_ext_scope))
33549 tree type, attrs, attrs_type;
33550 enum built_in_function code = (enum built_in_function) d->code;
33552 ftype = (enum ix86_builtin_func_type) d->flag;
33553 type = ix86_get_builtin_func_type (ftype);
33555 if (BUILTIN_TM_LOAD_P (code))
33557 attrs = attrs_load;
33558 attrs_type = attrs_type_load;
33560 else if (BUILTIN_TM_STORE_P (code))
33562 attrs = attrs_store;
33563 attrs_type = attrs_type_store;
33568 attrs_type = attrs_type_log;
33570 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33571 /* The builtin without the prefix for
33572 calling it directly. */
33573 d->name + strlen ("__builtin_"),
33575 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33576 set the TYPE_ATTRIBUTES. */
33577 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33579 set_builtin_decl (code, decl, false);
33584 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33585 in the current target ISA to allow the user to compile particular modules
33586 with different target specific options that differ from the command line
33589 ix86_init_mmx_sse_builtins (void)
33591 const struct builtin_description * d;
33592 enum ix86_builtin_func_type ftype;
33595 /* Add all special builtins with variable number of operands. */
33596 for (i = 0, d = bdesc_special_args;
33597 i < ARRAY_SIZE (bdesc_special_args);
33603 ftype = (enum ix86_builtin_func_type) d->flag;
33604 def_builtin (d->mask, d->name, ftype, d->code);
33607 /* Add all builtins with variable number of operands. */
33608 for (i = 0, d = bdesc_args;
33609 i < ARRAY_SIZE (bdesc_args);
33615 ftype = (enum ix86_builtin_func_type) d->flag;
33616 def_builtin_const (d->mask, d->name, ftype, d->code);
33619 /* Add all builtins with rounding. */
33620 for (i = 0, d = bdesc_round_args;
33621 i < ARRAY_SIZE (bdesc_round_args);
33627 ftype = (enum ix86_builtin_func_type) d->flag;
33628 def_builtin_const (d->mask, d->name, ftype, d->code);
33631 /* pcmpestr[im] insns. */
33632 for (i = 0, d = bdesc_pcmpestr;
33633 i < ARRAY_SIZE (bdesc_pcmpestr);
33636 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33637 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33639 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33640 def_builtin_const (d->mask, d->name, ftype, d->code);
33643 /* pcmpistr[im] insns. */
33644 for (i = 0, d = bdesc_pcmpistr;
33645 i < ARRAY_SIZE (bdesc_pcmpistr);
33648 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33649 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33651 ftype = INT_FTYPE_V16QI_V16QI_INT;
33652 def_builtin_const (d->mask, d->name, ftype, d->code);
33655 /* comi/ucomi insns. */
33656 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33658 if (d->mask == OPTION_MASK_ISA_SSE2)
33659 ftype = INT_FTYPE_V2DF_V2DF;
33661 ftype = INT_FTYPE_V4SF_V4SF;
33662 def_builtin_const (d->mask, d->name, ftype, d->code);
33666 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33667 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33668 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33669 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33671 /* SSE or 3DNow!A */
33672 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33673 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33674 IX86_BUILTIN_MASKMOVQ);
33677 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33678 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33680 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33681 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33682 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33683 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33686 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33687 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33688 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33689 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33692 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33693 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33694 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33695 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33696 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33697 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33698 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33699 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33700 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33701 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33702 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33703 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33706 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33707 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33710 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33711 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33712 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33713 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33714 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33715 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33716 IX86_BUILTIN_RDRAND64_STEP);
33719 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33720 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33721 IX86_BUILTIN_GATHERSIV2DF);
33723 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33724 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33725 IX86_BUILTIN_GATHERSIV4DF);
33727 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33728 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33729 IX86_BUILTIN_GATHERDIV2DF);
33731 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33732 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33733 IX86_BUILTIN_GATHERDIV4DF);
33735 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33736 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33737 IX86_BUILTIN_GATHERSIV4SF);
33739 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33740 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33741 IX86_BUILTIN_GATHERSIV8SF);
33743 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33744 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33745 IX86_BUILTIN_GATHERDIV4SF);
33747 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33748 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33749 IX86_BUILTIN_GATHERDIV8SF);
33751 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33752 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33753 IX86_BUILTIN_GATHERSIV2DI);
33755 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33756 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33757 IX86_BUILTIN_GATHERSIV4DI);
33759 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33760 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33761 IX86_BUILTIN_GATHERDIV2DI);
33763 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33764 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33765 IX86_BUILTIN_GATHERDIV4DI);
33767 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33768 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33769 IX86_BUILTIN_GATHERSIV4SI);
33771 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33772 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33773 IX86_BUILTIN_GATHERSIV8SI);
33775 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33776 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33777 IX86_BUILTIN_GATHERDIV4SI);
33779 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33780 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33781 IX86_BUILTIN_GATHERDIV8SI);
33783 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33784 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33785 IX86_BUILTIN_GATHERALTSIV4DF);
33787 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33788 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33789 IX86_BUILTIN_GATHERALTDIV8SF);
33791 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33792 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33793 IX86_BUILTIN_GATHERALTSIV4DI);
33795 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33796 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33797 IX86_BUILTIN_GATHERALTDIV8SI);
33800 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33801 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33802 IX86_BUILTIN_GATHER3SIV16SF);
33804 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33805 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33806 IX86_BUILTIN_GATHER3SIV8DF);
33808 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33809 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33810 IX86_BUILTIN_GATHER3DIV16SF);
33812 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33813 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33814 IX86_BUILTIN_GATHER3DIV8DF);
33816 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33817 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
33818 IX86_BUILTIN_GATHER3SIV16SI);
33820 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
33821 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
33822 IX86_BUILTIN_GATHER3SIV8DI);
33824 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
33825 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
33826 IX86_BUILTIN_GATHER3DIV16SI);
33828 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
33829 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
33830 IX86_BUILTIN_GATHER3DIV8DI);
33832 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
33833 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
33834 IX86_BUILTIN_GATHER3ALTSIV8DF);
33836 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
33837 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
33838 IX86_BUILTIN_GATHER3ALTDIV16SF);
33840 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
33841 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
33842 IX86_BUILTIN_GATHER3ALTSIV8DI);
33844 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
33845 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
33846 IX86_BUILTIN_GATHER3ALTDIV16SI);
33848 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
33849 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
33850 IX86_BUILTIN_SCATTERSIV16SF);
33852 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
33853 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
33854 IX86_BUILTIN_SCATTERSIV8DF);
33856 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
33857 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
33858 IX86_BUILTIN_SCATTERDIV16SF);
33860 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
33861 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
33862 IX86_BUILTIN_SCATTERDIV8DF);
33864 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
33865 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
33866 IX86_BUILTIN_SCATTERSIV16SI);
33868 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
33869 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
33870 IX86_BUILTIN_SCATTERSIV8DI);
33872 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
33873 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
33874 IX86_BUILTIN_SCATTERDIV16SI);
33876 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
33877 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
33878 IX86_BUILTIN_SCATTERDIV8DI);
33881 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
33882 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
33883 IX86_BUILTIN_GATHER3SIV2DF);
33885 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
33886 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
33887 IX86_BUILTIN_GATHER3SIV4DF);
33889 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
33890 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
33891 IX86_BUILTIN_GATHER3DIV2DF);
33893 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
33894 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
33895 IX86_BUILTIN_GATHER3DIV4DF);
33897 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
33898 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
33899 IX86_BUILTIN_GATHER3SIV4SF);
33901 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
33902 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
33903 IX86_BUILTIN_GATHER3SIV8SF);
33905 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
33906 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
33907 IX86_BUILTIN_GATHER3DIV4SF);
33909 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
33910 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
33911 IX86_BUILTIN_GATHER3DIV8SF);
33913 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
33914 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
33915 IX86_BUILTIN_GATHER3SIV2DI);
33917 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
33918 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
33919 IX86_BUILTIN_GATHER3SIV4DI);
33921 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
33922 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
33923 IX86_BUILTIN_GATHER3DIV2DI);
33925 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
33926 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
33927 IX86_BUILTIN_GATHER3DIV4DI);
33929 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
33930 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
33931 IX86_BUILTIN_GATHER3SIV4SI);
33933 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
33934 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
33935 IX86_BUILTIN_GATHER3SIV8SI);
33937 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
33938 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
33939 IX86_BUILTIN_GATHER3DIV4SI);
33941 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
33942 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
33943 IX86_BUILTIN_GATHER3DIV8SI);
33945 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
33946 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
33947 IX86_BUILTIN_GATHER3ALTSIV4DF);
33949 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
33950 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
33951 IX86_BUILTIN_GATHER3ALTDIV8SF);
33953 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
33954 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
33955 IX86_BUILTIN_GATHER3ALTSIV4DI);
33957 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
33958 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
33959 IX86_BUILTIN_GATHER3ALTDIV8SI);
33961 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
33962 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
33963 IX86_BUILTIN_SCATTERSIV8SF);
33965 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
33966 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
33967 IX86_BUILTIN_SCATTERSIV4SF);
33969 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
33970 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
33971 IX86_BUILTIN_SCATTERSIV4DF);
33973 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
33974 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
33975 IX86_BUILTIN_SCATTERSIV2DF);
33977 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
33978 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
33979 IX86_BUILTIN_SCATTERDIV8SF);
33981 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
33982 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
33983 IX86_BUILTIN_SCATTERDIV4SF);
33985 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
33986 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
33987 IX86_BUILTIN_SCATTERDIV4DF);
33989 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
33990 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
33991 IX86_BUILTIN_SCATTERDIV2DF);
33993 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
33994 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
33995 IX86_BUILTIN_SCATTERSIV8SI);
33997 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
33998 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
33999 IX86_BUILTIN_SCATTERSIV4SI);
34001 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
34002 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
34003 IX86_BUILTIN_SCATTERSIV4DI);
34005 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
34006 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
34007 IX86_BUILTIN_SCATTERSIV2DI);
34009 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
34010 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
34011 IX86_BUILTIN_SCATTERDIV8SI);
34013 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
34014 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
34015 IX86_BUILTIN_SCATTERDIV4SI);
34017 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
34018 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
34019 IX86_BUILTIN_SCATTERDIV4DI);
34021 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
34022 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
34023 IX86_BUILTIN_SCATTERDIV2DI);
34026 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
34027 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34028 IX86_BUILTIN_GATHERPFDPD);
34029 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
34030 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34031 IX86_BUILTIN_GATHERPFDPS);
34032 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
34033 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34034 IX86_BUILTIN_GATHERPFQPD);
34035 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
34036 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34037 IX86_BUILTIN_GATHERPFQPS);
34038 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
34039 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34040 IX86_BUILTIN_SCATTERPFDPD);
34041 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
34042 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34043 IX86_BUILTIN_SCATTERPFDPS);
34044 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
34045 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34046 IX86_BUILTIN_SCATTERPFQPD);
34047 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
34048 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34049 IX86_BUILTIN_SCATTERPFQPS);
34052 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
34053 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
34054 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
34055 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
34056 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
34057 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
34058 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
34059 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
34060 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
34061 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
34062 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
34063 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
34064 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
34065 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
34068 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
34069 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
34071 /* MMX access to the vec_init patterns. */
34072 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
34073 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
34075 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
34076 V4HI_FTYPE_HI_HI_HI_HI,
34077 IX86_BUILTIN_VEC_INIT_V4HI);
34079 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
34080 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
34081 IX86_BUILTIN_VEC_INIT_V8QI);
34083 /* Access to the vec_extract patterns. */
34084 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
34085 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
34086 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
34087 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
34088 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
34089 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
34090 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
34091 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
34092 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
34093 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
34095 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34096 "__builtin_ia32_vec_ext_v4hi",
34097 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
34099 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
34100 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
34102 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
34103 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
34105 /* Access to the vec_set patterns. */
34106 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
34107 "__builtin_ia32_vec_set_v2di",
34108 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
34110 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
34111 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
34113 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
34114 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
34116 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
34117 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
34119 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34120 "__builtin_ia32_vec_set_v4hi",
34121 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
34123 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
34124 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
34127 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
34128 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
34129 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
34130 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
34131 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
34132 "__builtin_ia32_rdseed_di_step",
34133 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
34136 def_builtin (0, "__builtin_ia32_addcarryx_u32",
34137 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
34138 def_builtin (OPTION_MASK_ISA_64BIT,
34139 "__builtin_ia32_addcarryx_u64",
34140 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34141 IX86_BUILTIN_ADDCARRYX64);
34144 def_builtin (0, "__builtin_ia32_sbb_u32",
34145 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
34146 def_builtin (OPTION_MASK_ISA_64BIT,
34147 "__builtin_ia32_sbb_u64",
34148 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34149 IX86_BUILTIN_SBB64);
34151 /* Read/write FLAGS. */
34152 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34153 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34154 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34155 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34156 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34157 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34158 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34159 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34162 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34163 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34166 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34167 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34169 /* Add FMA4 multi-arg argument instructions */
34170 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34175 ftype = (enum ix86_builtin_func_type) d->flag;
34176 def_builtin_const (d->mask, d->name, ftype, d->code);
34181 ix86_init_mpx_builtins ()
34183 const struct builtin_description * d;
34184 enum ix86_builtin_func_type ftype;
34188 for (i = 0, d = bdesc_mpx;
34189 i < ARRAY_SIZE (bdesc_mpx);
34195 ftype = (enum ix86_builtin_func_type) d->flag;
34196 decl = def_builtin (d->mask, d->name, ftype, d->code);
34198 /* With no leaf and nothrow flags for MPX builtins
34199 abnormal edges may follow its call when setjmp
34200 presents in the function. Since we may have a lot
34201 of MPX builtins calls it causes lots of useless
34202 edges and enormous PHI nodes. To avoid this we mark
34203 MPX builtins as leaf and nothrow. */
34206 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34208 TREE_NOTHROW (decl) = 1;
34212 ix86_builtins_isa[(int)d->code].leaf_p = true;
34213 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34217 for (i = 0, d = bdesc_mpx_const;
34218 i < ARRAY_SIZE (bdesc_mpx_const);
34224 ftype = (enum ix86_builtin_func_type) d->flag;
34225 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34229 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34231 TREE_NOTHROW (decl) = 1;
34235 ix86_builtins_isa[(int)d->code].leaf_p = true;
34236 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34241 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34242 to return a pointer to VERSION_DECL if the outcome of the expression
34243 formed by PREDICATE_CHAIN is true. This function will be called during
34244 version dispatch to decide which function version to execute. It returns
34245 the basic block at the end, to which more conditions can be added. */
34248 add_condition_to_bb (tree function_decl, tree version_decl,
34249 tree predicate_chain, basic_block new_bb)
34251 gimple return_stmt;
34252 tree convert_expr, result_var;
34253 gimple convert_stmt;
34254 gimple call_cond_stmt;
34255 gimple if_else_stmt;
34257 basic_block bb1, bb2, bb3;
34260 tree cond_var, and_expr_var = NULL_TREE;
34263 tree predicate_decl, predicate_arg;
34265 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34267 gcc_assert (new_bb != NULL);
34268 gseq = bb_seq (new_bb);
34271 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34272 build_fold_addr_expr (version_decl));
34273 result_var = create_tmp_var (ptr_type_node);
34274 convert_stmt = gimple_build_assign (result_var, convert_expr);
34275 return_stmt = gimple_build_return (result_var);
34277 if (predicate_chain == NULL_TREE)
34279 gimple_seq_add_stmt (&gseq, convert_stmt);
34280 gimple_seq_add_stmt (&gseq, return_stmt);
34281 set_bb_seq (new_bb, gseq);
34282 gimple_set_bb (convert_stmt, new_bb);
34283 gimple_set_bb (return_stmt, new_bb);
34288 while (predicate_chain != NULL)
34290 cond_var = create_tmp_var (integer_type_node);
34291 predicate_decl = TREE_PURPOSE (predicate_chain);
34292 predicate_arg = TREE_VALUE (predicate_chain);
34293 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34294 gimple_call_set_lhs (call_cond_stmt, cond_var);
34296 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34297 gimple_set_bb (call_cond_stmt, new_bb);
34298 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34300 predicate_chain = TREE_CHAIN (predicate_chain);
34302 if (and_expr_var == NULL)
34303 and_expr_var = cond_var;
34306 gimple assign_stmt;
34307 /* Use MIN_EXPR to check if any integer is zero?.
34308 and_expr_var = min_expr <cond_var, and_expr_var> */
34309 assign_stmt = gimple_build_assign (and_expr_var,
34310 build2 (MIN_EXPR, integer_type_node,
34311 cond_var, and_expr_var));
34313 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34314 gimple_set_bb (assign_stmt, new_bb);
34315 gimple_seq_add_stmt (&gseq, assign_stmt);
34319 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34321 NULL_TREE, NULL_TREE);
34322 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34323 gimple_set_bb (if_else_stmt, new_bb);
34324 gimple_seq_add_stmt (&gseq, if_else_stmt);
34326 gimple_seq_add_stmt (&gseq, convert_stmt);
34327 gimple_seq_add_stmt (&gseq, return_stmt);
34328 set_bb_seq (new_bb, gseq);
34331 e12 = split_block (bb1, if_else_stmt);
34333 e12->flags &= ~EDGE_FALLTHRU;
34334 e12->flags |= EDGE_TRUE_VALUE;
34336 e23 = split_block (bb2, return_stmt);
34338 gimple_set_bb (convert_stmt, bb2);
34339 gimple_set_bb (return_stmt, bb2);
34342 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34345 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34352 /* This parses the attribute arguments to target in DECL and determines
34353 the right builtin to use to match the platform specification.
34354 It returns the priority value for this version decl. If PREDICATE_LIST
34355 is not NULL, it stores the list of cpu features that need to be checked
34356 before dispatching this function. */
34358 static unsigned int
34359 get_builtin_code_for_version (tree decl, tree *predicate_list)
34362 struct cl_target_option cur_target;
34364 struct cl_target_option *new_target;
34365 const char *arg_str = NULL;
34366 const char *attrs_str = NULL;
34367 char *tok_str = NULL;
34370 /* Priority of i386 features, greater value is higher priority. This is
34371 used to decide the order in which function dispatch must happen. For
34372 instance, a version specialized for SSE4.2 should be checked for dispatch
34373 before a version for SSE3, as SSE4.2 implies SSE3. */
34374 enum feature_priority
34405 enum feature_priority priority = P_ZERO;
34407 /* These are the target attribute strings for which a dispatcher is
34408 available, from fold_builtin_cpu. */
34410 static struct _feature_list
34412 const char *const name;
34413 const enum feature_priority priority;
34415 const feature_list[] =
34421 {"sse4a", P_SSE4_A},
34422 {"ssse3", P_SSSE3},
34423 {"sse4.1", P_SSE4_1},
34424 {"sse4.2", P_SSE4_2},
34425 {"popcnt", P_POPCNT},
34433 {"avx512f", P_AVX512F}
34437 static unsigned int NUM_FEATURES
34438 = sizeof (feature_list) / sizeof (struct _feature_list);
34442 tree predicate_chain = NULL_TREE;
34443 tree predicate_decl, predicate_arg;
34445 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34446 gcc_assert (attrs != NULL);
34448 attrs = TREE_VALUE (TREE_VALUE (attrs));
34450 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34451 attrs_str = TREE_STRING_POINTER (attrs);
34453 /* Return priority zero for default function. */
34454 if (strcmp (attrs_str, "default") == 0)
34457 /* Handle arch= if specified. For priority, set it to be 1 more than
34458 the best instruction set the processor can handle. For instance, if
34459 there is a version for atom and a version for ssse3 (the highest ISA
34460 priority for atom), the atom version must be checked for dispatch
34461 before the ssse3 version. */
34462 if (strstr (attrs_str, "arch=") != NULL)
34464 cl_target_option_save (&cur_target, &global_options);
34465 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34466 &global_options_set);
34468 gcc_assert (target_node);
34469 new_target = TREE_TARGET_OPTION (target_node);
34470 gcc_assert (new_target);
34472 if (new_target->arch_specified && new_target->arch > 0)
34474 switch (new_target->arch)
34476 case PROCESSOR_CORE2:
34478 priority = P_PROC_SSSE3;
34480 case PROCESSOR_NEHALEM:
34481 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34482 arg_str = "westmere";
34484 /* We translate "arch=corei7" and "arch=nehalem" to
34485 "corei7" so that it will be mapped to M_INTEL_COREI7
34486 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34487 arg_str = "corei7";
34488 priority = P_PROC_SSE4_2;
34490 case PROCESSOR_SANDYBRIDGE:
34491 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34492 arg_str = "ivybridge";
34494 arg_str = "sandybridge";
34495 priority = P_PROC_AVX;
34497 case PROCESSOR_HASWELL:
34498 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34499 arg_str = "broadwell";
34501 arg_str = "haswell";
34502 priority = P_PROC_AVX2;
34504 case PROCESSOR_BONNELL:
34505 arg_str = "bonnell";
34506 priority = P_PROC_SSSE3;
34508 case PROCESSOR_KNL:
34510 priority = P_PROC_AVX512F;
34512 case PROCESSOR_SILVERMONT:
34513 arg_str = "silvermont";
34514 priority = P_PROC_SSE4_2;
34516 case PROCESSOR_AMDFAM10:
34517 arg_str = "amdfam10h";
34518 priority = P_PROC_SSE4_A;
34520 case PROCESSOR_BTVER1:
34521 arg_str = "btver1";
34522 priority = P_PROC_SSE4_A;
34524 case PROCESSOR_BTVER2:
34525 arg_str = "btver2";
34526 priority = P_PROC_BMI;
34528 case PROCESSOR_BDVER1:
34529 arg_str = "bdver1";
34530 priority = P_PROC_XOP;
34532 case PROCESSOR_BDVER2:
34533 arg_str = "bdver2";
34534 priority = P_PROC_FMA;
34536 case PROCESSOR_BDVER3:
34537 arg_str = "bdver3";
34538 priority = P_PROC_FMA;
34540 case PROCESSOR_BDVER4:
34541 arg_str = "bdver4";
34542 priority = P_PROC_AVX2;
34547 cl_target_option_restore (&global_options, &cur_target);
34549 if (predicate_list && arg_str == NULL)
34551 error_at (DECL_SOURCE_LOCATION (decl),
34552 "No dispatcher found for the versioning attributes");
34556 if (predicate_list)
34558 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34559 /* For a C string literal the length includes the trailing NULL. */
34560 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34561 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34566 /* Process feature name. */
34567 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34568 strcpy (tok_str, attrs_str);
34569 token = strtok (tok_str, ",");
34570 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34572 while (token != NULL)
34574 /* Do not process "arch=" */
34575 if (strncmp (token, "arch=", 5) == 0)
34577 token = strtok (NULL, ",");
34580 for (i = 0; i < NUM_FEATURES; ++i)
34582 if (strcmp (token, feature_list[i].name) == 0)
34584 if (predicate_list)
34586 predicate_arg = build_string_literal (
34587 strlen (feature_list[i].name) + 1,
34588 feature_list[i].name);
34589 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34592 /* Find the maximum priority feature. */
34593 if (feature_list[i].priority > priority)
34594 priority = feature_list[i].priority;
34599 if (predicate_list && i == NUM_FEATURES)
34601 error_at (DECL_SOURCE_LOCATION (decl),
34602 "No dispatcher found for %s", token);
34605 token = strtok (NULL, ",");
34609 if (predicate_list && predicate_chain == NULL_TREE)
34611 error_at (DECL_SOURCE_LOCATION (decl),
34612 "No dispatcher found for the versioning attributes : %s",
34616 else if (predicate_list)
34618 predicate_chain = nreverse (predicate_chain);
34619 *predicate_list = predicate_chain;
34625 /* This compares the priority of target features in function DECL1
34626 and DECL2. It returns positive value if DECL1 is higher priority,
34627 negative value if DECL2 is higher priority and 0 if they are the
34631 ix86_compare_version_priority (tree decl1, tree decl2)
34633 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34634 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34636 return (int)priority1 - (int)priority2;
34639 /* V1 and V2 point to function versions with different priorities
34640 based on the target ISA. This function compares their priorities. */
34643 feature_compare (const void *v1, const void *v2)
34645 typedef struct _function_version_info
34648 tree predicate_chain;
34649 unsigned int dispatch_priority;
34650 } function_version_info;
34652 const function_version_info c1 = *(const function_version_info *)v1;
34653 const function_version_info c2 = *(const function_version_info *)v2;
34654 return (c2.dispatch_priority - c1.dispatch_priority);
34657 /* This function generates the dispatch function for
34658 multi-versioned functions. DISPATCH_DECL is the function which will
34659 contain the dispatch logic. FNDECLS are the function choices for
34660 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34661 in DISPATCH_DECL in which the dispatch code is generated. */
34664 dispatch_function_versions (tree dispatch_decl,
34666 basic_block *empty_bb)
34669 gimple ifunc_cpu_init_stmt;
34673 vec<tree> *fndecls;
34674 unsigned int num_versions = 0;
34675 unsigned int actual_versions = 0;
34678 struct _function_version_info
34681 tree predicate_chain;
34682 unsigned int dispatch_priority;
34683 }*function_version_info;
34685 gcc_assert (dispatch_decl != NULL
34686 && fndecls_p != NULL
34687 && empty_bb != NULL);
34689 /*fndecls_p is actually a vector. */
34690 fndecls = static_cast<vec<tree> *> (fndecls_p);
34692 /* At least one more version other than the default. */
34693 num_versions = fndecls->length ();
34694 gcc_assert (num_versions >= 2);
34696 function_version_info = (struct _function_version_info *)
34697 XNEWVEC (struct _function_version_info, (num_versions - 1));
34699 /* The first version in the vector is the default decl. */
34700 default_decl = (*fndecls)[0];
34702 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34704 gseq = bb_seq (*empty_bb);
34705 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34706 constructors, so explicity call __builtin_cpu_init here. */
34707 ifunc_cpu_init_stmt = gimple_build_call_vec (
34708 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34709 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34710 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34711 set_bb_seq (*empty_bb, gseq);
34716 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34718 tree version_decl = ele;
34719 tree predicate_chain = NULL_TREE;
34720 unsigned int priority;
34721 /* Get attribute string, parse it and find the right predicate decl.
34722 The predicate function could be a lengthy combination of many
34723 features, like arch-type and various isa-variants. */
34724 priority = get_builtin_code_for_version (version_decl,
34727 if (predicate_chain == NULL_TREE)
34730 function_version_info [actual_versions].version_decl = version_decl;
34731 function_version_info [actual_versions].predicate_chain
34733 function_version_info [actual_versions].dispatch_priority = priority;
34737 /* Sort the versions according to descending order of dispatch priority. The
34738 priority is based on the ISA. This is not a perfect solution. There
34739 could still be ambiguity. If more than one function version is suitable
34740 to execute, which one should be dispatched? In future, allow the user
34741 to specify a dispatch priority next to the version. */
34742 qsort (function_version_info, actual_versions,
34743 sizeof (struct _function_version_info), feature_compare);
34745 for (i = 0; i < actual_versions; ++i)
34746 *empty_bb = add_condition_to_bb (dispatch_decl,
34747 function_version_info[i].version_decl,
34748 function_version_info[i].predicate_chain,
34751 /* dispatch default version at the end. */
34752 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34755 free (function_version_info);
34759 /* Comparator function to be used in qsort routine to sort attribute
34760 specification strings to "target". */
34763 attr_strcmp (const void *v1, const void *v2)
34765 const char *c1 = *(char *const*)v1;
34766 const char *c2 = *(char *const*)v2;
34767 return strcmp (c1, c2);
34770 /* ARGLIST is the argument to target attribute. This function tokenizes
34771 the comma separated arguments, sorts them and returns a string which
34772 is a unique identifier for the comma separated arguments. It also
34773 replaces non-identifier characters "=,-" with "_". */
34776 sorted_attr_string (tree arglist)
34779 size_t str_len_sum = 0;
34780 char **args = NULL;
34781 char *attr_str, *ret_str;
34783 unsigned int argnum = 1;
34786 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34788 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34789 size_t len = strlen (str);
34790 str_len_sum += len + 1;
34791 if (arg != arglist)
34793 for (i = 0; i < strlen (str); i++)
34798 attr_str = XNEWVEC (char, str_len_sum);
34800 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34802 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34803 size_t len = strlen (str);
34804 memcpy (attr_str + str_len_sum, str, len);
34805 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34806 str_len_sum += len + 1;
34809 /* Replace "=,-" with "_". */
34810 for (i = 0; i < strlen (attr_str); i++)
34811 if (attr_str[i] == '=' || attr_str[i]== '-')
34817 args = XNEWVEC (char *, argnum);
34820 attr = strtok (attr_str, ",");
34821 while (attr != NULL)
34825 attr = strtok (NULL, ",");
34828 qsort (args, argnum, sizeof (char *), attr_strcmp);
34830 ret_str = XNEWVEC (char, str_len_sum);
34832 for (i = 0; i < argnum; i++)
34834 size_t len = strlen (args[i]);
34835 memcpy (ret_str + str_len_sum, args[i], len);
34836 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
34837 str_len_sum += len + 1;
34841 XDELETEVEC (attr_str);
34845 /* This function changes the assembler name for functions that are
34846 versions. If DECL is a function version and has a "target"
34847 attribute, it appends the attribute string to its assembler name. */
34850 ix86_mangle_function_version_assembler_name (tree decl, tree id)
34853 const char *orig_name, *version_string;
34854 char *attr_str, *assembler_name;
34856 if (DECL_DECLARED_INLINE_P (decl)
34857 && lookup_attribute ("gnu_inline",
34858 DECL_ATTRIBUTES (decl)))
34859 error_at (DECL_SOURCE_LOCATION (decl),
34860 "Function versions cannot be marked as gnu_inline,"
34861 " bodies have to be generated");
34863 if (DECL_VIRTUAL_P (decl)
34864 || DECL_VINDEX (decl))
34865 sorry ("Virtual function multiversioning not supported");
34867 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34869 /* target attribute string cannot be NULL. */
34870 gcc_assert (version_attr != NULL_TREE);
34872 orig_name = IDENTIFIER_POINTER (id);
34874 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
34876 if (strcmp (version_string, "default") == 0)
34879 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
34880 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
34882 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
34884 /* Allow assembler name to be modified if already set. */
34885 if (DECL_ASSEMBLER_NAME_SET_P (decl))
34886 SET_DECL_RTL (decl, NULL);
34888 tree ret = get_identifier (assembler_name);
34889 XDELETEVEC (attr_str);
34890 XDELETEVEC (assembler_name);
34894 /* This function returns true if FN1 and FN2 are versions of the same function,
34895 that is, the target strings of the function decls are different. This assumes
34896 that FN1 and FN2 have the same signature. */
34899 ix86_function_versions (tree fn1, tree fn2)
34902 char *target1, *target2;
34905 if (TREE_CODE (fn1) != FUNCTION_DECL
34906 || TREE_CODE (fn2) != FUNCTION_DECL)
34909 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
34910 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
34912 /* At least one function decl should have the target attribute specified. */
34913 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
34916 /* Diagnose missing target attribute if one of the decls is already
34917 multi-versioned. */
34918 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
34920 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
34922 if (attr2 != NULL_TREE)
34929 error_at (DECL_SOURCE_LOCATION (fn2),
34930 "missing %<target%> attribute for multi-versioned %D",
34932 inform (DECL_SOURCE_LOCATION (fn1),
34933 "previous declaration of %D", fn1);
34934 /* Prevent diagnosing of the same error multiple times. */
34935 DECL_ATTRIBUTES (fn2)
34936 = tree_cons (get_identifier ("target"),
34937 copy_node (TREE_VALUE (attr1)),
34938 DECL_ATTRIBUTES (fn2));
34943 target1 = sorted_attr_string (TREE_VALUE (attr1));
34944 target2 = sorted_attr_string (TREE_VALUE (attr2));
34946 /* The sorted target strings must be different for fn1 and fn2
34948 if (strcmp (target1, target2) == 0)
34953 XDELETEVEC (target1);
34954 XDELETEVEC (target2);
34960 ix86_mangle_decl_assembler_name (tree decl, tree id)
34962 /* For function version, add the target suffix to the assembler name. */
34963 if (TREE_CODE (decl) == FUNCTION_DECL
34964 && DECL_FUNCTION_VERSIONED (decl))
34965 id = ix86_mangle_function_version_assembler_name (decl, id);
34966 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
34967 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
34973 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
34974 is true, append the full path name of the source file. */
34977 make_name (tree decl, const char *suffix, bool make_unique)
34979 char *global_var_name;
34982 const char *unique_name = NULL;
34984 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
34986 /* Get a unique name that can be used globally without any chances
34987 of collision at link time. */
34989 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
34991 name_len = strlen (name) + strlen (suffix) + 2;
34994 name_len += strlen (unique_name) + 1;
34995 global_var_name = XNEWVEC (char, name_len);
34997 /* Use '.' to concatenate names as it is demangler friendly. */
34999 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
35002 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
35004 return global_var_name;
35007 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35009 /* Make a dispatcher declaration for the multi-versioned function DECL.
35010 Calls to DECL function will be replaced with calls to the dispatcher
35011 by the front-end. Return the decl created. */
35014 make_dispatcher_decl (const tree decl)
35018 tree fn_type, func_type;
35019 bool is_uniq = false;
35021 if (TREE_PUBLIC (decl) == 0)
35024 func_name = make_name (decl, "ifunc", is_uniq);
35026 fn_type = TREE_TYPE (decl);
35027 func_type = build_function_type (TREE_TYPE (fn_type),
35028 TYPE_ARG_TYPES (fn_type));
35030 func_decl = build_fn_decl (func_name, func_type);
35031 XDELETEVEC (func_name);
35032 TREE_USED (func_decl) = 1;
35033 DECL_CONTEXT (func_decl) = NULL_TREE;
35034 DECL_INITIAL (func_decl) = error_mark_node;
35035 DECL_ARTIFICIAL (func_decl) = 1;
35036 /* Mark this func as external, the resolver will flip it again if
35037 it gets generated. */
35038 DECL_EXTERNAL (func_decl) = 1;
35039 /* This will be of type IFUNCs have to be externally visible. */
35040 TREE_PUBLIC (func_decl) = 1;
35047 /* Returns true if decl is multi-versioned and DECL is the default function,
35048 that is it is not tagged with target specific optimization. */
35051 is_function_default_version (const tree decl)
35053 if (TREE_CODE (decl) != FUNCTION_DECL
35054 || !DECL_FUNCTION_VERSIONED (decl))
35056 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35058 attr = TREE_VALUE (TREE_VALUE (attr));
35059 return (TREE_CODE (attr) == STRING_CST
35060 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
35063 /* Make a dispatcher declaration for the multi-versioned function DECL.
35064 Calls to DECL function will be replaced with calls to the dispatcher
35065 by the front-end. Returns the decl of the dispatcher function. */
35068 ix86_get_function_versions_dispatcher (void *decl)
35070 tree fn = (tree) decl;
35071 struct cgraph_node *node = NULL;
35072 struct cgraph_node *default_node = NULL;
35073 struct cgraph_function_version_info *node_v = NULL;
35074 struct cgraph_function_version_info *first_v = NULL;
35076 tree dispatch_decl = NULL;
35078 struct cgraph_function_version_info *default_version_info = NULL;
35080 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
35082 node = cgraph_node::get (fn);
35083 gcc_assert (node != NULL);
35085 node_v = node->function_version ();
35086 gcc_assert (node_v != NULL);
35088 if (node_v->dispatcher_resolver != NULL)
35089 return node_v->dispatcher_resolver;
35091 /* Find the default version and make it the first node. */
35093 /* Go to the beginning of the chain. */
35094 while (first_v->prev != NULL)
35095 first_v = first_v->prev;
35096 default_version_info = first_v;
35097 while (default_version_info != NULL)
35099 if (is_function_default_version
35100 (default_version_info->this_node->decl))
35102 default_version_info = default_version_info->next;
35105 /* If there is no default node, just return NULL. */
35106 if (default_version_info == NULL)
35109 /* Make default info the first node. */
35110 if (first_v != default_version_info)
35112 default_version_info->prev->next = default_version_info->next;
35113 if (default_version_info->next)
35114 default_version_info->next->prev = default_version_info->prev;
35115 first_v->prev = default_version_info;
35116 default_version_info->next = first_v;
35117 default_version_info->prev = NULL;
35120 default_node = default_version_info->this_node;
35122 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35123 if (targetm.has_ifunc_p ())
35125 struct cgraph_function_version_info *it_v = NULL;
35126 struct cgraph_node *dispatcher_node = NULL;
35127 struct cgraph_function_version_info *dispatcher_version_info = NULL;
35129 /* Right now, the dispatching is done via ifunc. */
35130 dispatch_decl = make_dispatcher_decl (default_node->decl);
35132 dispatcher_node = cgraph_node::get_create (dispatch_decl);
35133 gcc_assert (dispatcher_node != NULL);
35134 dispatcher_node->dispatcher_function = 1;
35135 dispatcher_version_info
35136 = dispatcher_node->insert_new_function_version ();
35137 dispatcher_version_info->next = default_version_info;
35138 dispatcher_node->definition = 1;
35140 /* Set the dispatcher for all the versions. */
35141 it_v = default_version_info;
35142 while (it_v != NULL)
35144 it_v->dispatcher_resolver = dispatch_decl;
35151 error_at (DECL_SOURCE_LOCATION (default_node->decl),
35152 "multiversioning needs ifunc which is not supported "
35156 return dispatch_decl;
35159 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
35163 make_attribute (const char *name, const char *arg_name, tree chain)
35166 tree attr_arg_name;
35170 attr_name = get_identifier (name);
35171 attr_arg_name = build_string (strlen (arg_name), arg_name);
35172 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35173 attr = tree_cons (attr_name, attr_args, chain);
35177 /* Make the resolver function decl to dispatch the versions of
35178 a multi-versioned function, DEFAULT_DECL. Create an
35179 empty basic block in the resolver and store the pointer in
35180 EMPTY_BB. Return the decl of the resolver function. */
35183 make_resolver_func (const tree default_decl,
35184 const tree dispatch_decl,
35185 basic_block *empty_bb)
35187 char *resolver_name;
35188 tree decl, type, decl_name, t;
35189 bool is_uniq = false;
35191 /* IFUNC's have to be globally visible. So, if the default_decl is
35192 not, then the name of the IFUNC should be made unique. */
35193 if (TREE_PUBLIC (default_decl) == 0)
35196 /* Append the filename to the resolver function if the versions are
35197 not externally visible. This is because the resolver function has
35198 to be externally visible for the loader to find it. So, appending
35199 the filename will prevent conflicts with a resolver function from
35200 another module which is based on the same version name. */
35201 resolver_name = make_name (default_decl, "resolver", is_uniq);
35203 /* The resolver function should return a (void *). */
35204 type = build_function_type_list (ptr_type_node, NULL_TREE);
35206 decl = build_fn_decl (resolver_name, type);
35207 decl_name = get_identifier (resolver_name);
35208 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35210 DECL_NAME (decl) = decl_name;
35211 TREE_USED (decl) = 1;
35212 DECL_ARTIFICIAL (decl) = 1;
35213 DECL_IGNORED_P (decl) = 0;
35214 /* IFUNC resolvers have to be externally visible. */
35215 TREE_PUBLIC (decl) = 1;
35216 DECL_UNINLINABLE (decl) = 1;
35218 /* Resolver is not external, body is generated. */
35219 DECL_EXTERNAL (decl) = 0;
35220 DECL_EXTERNAL (dispatch_decl) = 0;
35222 DECL_CONTEXT (decl) = NULL_TREE;
35223 DECL_INITIAL (decl) = make_node (BLOCK);
35224 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35226 if (DECL_COMDAT_GROUP (default_decl)
35227 || TREE_PUBLIC (default_decl))
35229 /* In this case, each translation unit with a call to this
35230 versioned function will put out a resolver. Ensure it
35231 is comdat to keep just one copy. */
35232 DECL_COMDAT (decl) = 1;
35233 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35235 /* Build result decl and add to function_decl. */
35236 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35237 DECL_ARTIFICIAL (t) = 1;
35238 DECL_IGNORED_P (t) = 1;
35239 DECL_RESULT (decl) = t;
35241 gimplify_function_tree (decl);
35242 push_cfun (DECL_STRUCT_FUNCTION (decl));
35243 *empty_bb = init_lowered_empty_function (decl, false, 0);
35245 cgraph_node::add_new_function (decl, true);
35246 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35250 gcc_assert (dispatch_decl != NULL);
35251 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35252 DECL_ATTRIBUTES (dispatch_decl)
35253 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35255 /* Create the alias for dispatch to resolver here. */
35256 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35257 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35258 XDELETEVEC (resolver_name);
35262 /* Generate the dispatching code body to dispatch multi-versioned function
35263 DECL. The target hook is called to process the "target" attributes and
35264 provide the code to dispatch the right function at run-time. NODE points
35265 to the dispatcher decl whose body will be created. */
35268 ix86_generate_version_dispatcher_body (void *node_p)
35270 tree resolver_decl;
35271 basic_block empty_bb;
35272 tree default_ver_decl;
35273 struct cgraph_node *versn;
35274 struct cgraph_node *node;
35276 struct cgraph_function_version_info *node_version_info = NULL;
35277 struct cgraph_function_version_info *versn_info = NULL;
35279 node = (cgraph_node *)node_p;
35281 node_version_info = node->function_version ();
35282 gcc_assert (node->dispatcher_function
35283 && node_version_info != NULL);
35285 if (node_version_info->dispatcher_resolver)
35286 return node_version_info->dispatcher_resolver;
35288 /* The first version in the chain corresponds to the default version. */
35289 default_ver_decl = node_version_info->next->this_node->decl;
35291 /* node is going to be an alias, so remove the finalized bit. */
35292 node->definition = false;
35294 resolver_decl = make_resolver_func (default_ver_decl,
35295 node->decl, &empty_bb);
35297 node_version_info->dispatcher_resolver = resolver_decl;
35299 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35301 auto_vec<tree, 2> fn_ver_vec;
35303 for (versn_info = node_version_info->next; versn_info;
35304 versn_info = versn_info->next)
35306 versn = versn_info->this_node;
35307 /* Check for virtual functions here again, as by this time it should
35308 have been determined if this function needs a vtable index or
35309 not. This happens for methods in derived classes that override
35310 virtual methods in base classes but are not explicitly marked as
35312 if (DECL_VINDEX (versn->decl))
35313 sorry ("Virtual function multiversioning not supported");
35315 fn_ver_vec.safe_push (versn->decl);
35318 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35319 cgraph_edge::rebuild_edges ();
35321 return resolver_decl;
35323 /* This builds the processor_model struct type defined in
35324 libgcc/config/i386/cpuinfo.c */
35327 build_processor_model_struct (void)
35329 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35331 tree field = NULL_TREE, field_chain = NULL_TREE;
35333 tree type = make_node (RECORD_TYPE);
35335 /* The first 3 fields are unsigned int. */
35336 for (i = 0; i < 3; ++i)
35338 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35339 get_identifier (field_name[i]), unsigned_type_node);
35340 if (field_chain != NULL_TREE)
35341 DECL_CHAIN (field) = field_chain;
35342 field_chain = field;
35345 /* The last field is an array of unsigned integers of size one. */
35346 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35347 get_identifier (field_name[3]),
35348 build_array_type (unsigned_type_node,
35349 build_index_type (size_one_node)));
35350 if (field_chain != NULL_TREE)
35351 DECL_CHAIN (field) = field_chain;
35352 field_chain = field;
35354 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35358 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35361 make_var_decl (tree type, const char *name)
35365 new_decl = build_decl (UNKNOWN_LOCATION,
35367 get_identifier(name),
35370 DECL_EXTERNAL (new_decl) = 1;
35371 TREE_STATIC (new_decl) = 1;
35372 TREE_PUBLIC (new_decl) = 1;
35373 DECL_INITIAL (new_decl) = 0;
35374 DECL_ARTIFICIAL (new_decl) = 0;
35375 DECL_PRESERVE_P (new_decl) = 1;
35377 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35378 assemble_variable (new_decl, 0, 0, 0);
35383 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35384 into an integer defined in libgcc/config/i386/cpuinfo.c */
35387 fold_builtin_cpu (tree fndecl, tree *args)
35390 enum ix86_builtins fn_code = (enum ix86_builtins)
35391 DECL_FUNCTION_CODE (fndecl);
35392 tree param_string_cst = NULL;
35394 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35395 enum processor_features
35418 /* These are the values for vendor types and cpu types and subtypes
35419 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35420 the corresponding start value. */
35421 enum processor_model
35431 M_INTEL_SILVERMONT,
35435 M_CPU_SUBTYPE_START,
35436 M_INTEL_COREI7_NEHALEM,
35437 M_INTEL_COREI7_WESTMERE,
35438 M_INTEL_COREI7_SANDYBRIDGE,
35439 M_AMDFAM10H_BARCELONA,
35440 M_AMDFAM10H_SHANGHAI,
35441 M_AMDFAM10H_ISTANBUL,
35442 M_AMDFAM15H_BDVER1,
35443 M_AMDFAM15H_BDVER2,
35444 M_AMDFAM15H_BDVER3,
35445 M_AMDFAM15H_BDVER4,
35446 M_INTEL_COREI7_IVYBRIDGE,
35447 M_INTEL_COREI7_HASWELL,
35448 M_INTEL_COREI7_BROADWELL
35451 static struct _arch_names_table
35453 const char *const name;
35454 const enum processor_model model;
35456 const arch_names_table[] =
35459 {"intel", M_INTEL},
35460 {"atom", M_INTEL_BONNELL},
35461 {"slm", M_INTEL_SILVERMONT},
35462 {"core2", M_INTEL_CORE2},
35463 {"corei7", M_INTEL_COREI7},
35464 {"nehalem", M_INTEL_COREI7_NEHALEM},
35465 {"westmere", M_INTEL_COREI7_WESTMERE},
35466 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35467 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35468 {"haswell", M_INTEL_COREI7_HASWELL},
35469 {"broadwell", M_INTEL_COREI7_BROADWELL},
35470 {"bonnell", M_INTEL_BONNELL},
35471 {"silvermont", M_INTEL_SILVERMONT},
35472 {"knl", M_INTEL_KNL},
35473 {"amdfam10h", M_AMDFAM10H},
35474 {"barcelona", M_AMDFAM10H_BARCELONA},
35475 {"shanghai", M_AMDFAM10H_SHANGHAI},
35476 {"istanbul", M_AMDFAM10H_ISTANBUL},
35477 {"btver1", M_AMD_BTVER1},
35478 {"amdfam15h", M_AMDFAM15H},
35479 {"bdver1", M_AMDFAM15H_BDVER1},
35480 {"bdver2", M_AMDFAM15H_BDVER2},
35481 {"bdver3", M_AMDFAM15H_BDVER3},
35482 {"bdver4", M_AMDFAM15H_BDVER4},
35483 {"btver2", M_AMD_BTVER2},
35486 static struct _isa_names_table
35488 const char *const name;
35489 const enum processor_features feature;
35491 const isa_names_table[] =
35495 {"popcnt", F_POPCNT},
35499 {"ssse3", F_SSSE3},
35500 {"sse4a", F_SSE4_A},
35501 {"sse4.1", F_SSE4_1},
35502 {"sse4.2", F_SSE4_2},
35508 {"avx512f",F_AVX512F},
35513 tree __processor_model_type = build_processor_model_struct ();
35514 tree __cpu_model_var = make_var_decl (__processor_model_type,
35518 varpool_node::add (__cpu_model_var);
35520 gcc_assert ((args != NULL) && (*args != NULL));
35522 param_string_cst = *args;
35523 while (param_string_cst
35524 && TREE_CODE (param_string_cst) != STRING_CST)
35526 /* *args must be a expr that can contain other EXPRS leading to a
35528 if (!EXPR_P (param_string_cst))
35530 error ("Parameter to builtin must be a string constant or literal");
35531 return integer_zero_node;
35533 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35536 gcc_assert (param_string_cst);
35538 if (fn_code == IX86_BUILTIN_CPU_IS)
35544 unsigned int field_val = 0;
35545 unsigned int NUM_ARCH_NAMES
35546 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35548 for (i = 0; i < NUM_ARCH_NAMES; i++)
35549 if (strcmp (arch_names_table[i].name,
35550 TREE_STRING_POINTER (param_string_cst)) == 0)
35553 if (i == NUM_ARCH_NAMES)
35555 error ("Parameter to builtin not valid: %s",
35556 TREE_STRING_POINTER (param_string_cst));
35557 return integer_zero_node;
35560 field = TYPE_FIELDS (__processor_model_type);
35561 field_val = arch_names_table[i].model;
35563 /* CPU types are stored in the next field. */
35564 if (field_val > M_CPU_TYPE_START
35565 && field_val < M_CPU_SUBTYPE_START)
35567 field = DECL_CHAIN (field);
35568 field_val -= M_CPU_TYPE_START;
35571 /* CPU subtypes are stored in the next field. */
35572 if (field_val > M_CPU_SUBTYPE_START)
35574 field = DECL_CHAIN ( DECL_CHAIN (field));
35575 field_val -= M_CPU_SUBTYPE_START;
35578 /* Get the appropriate field in __cpu_model. */
35579 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35582 /* Check the value. */
35583 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35584 build_int_cstu (unsigned_type_node, field_val));
35585 return build1 (CONVERT_EXPR, integer_type_node, final);
35587 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35594 unsigned int field_val = 0;
35595 unsigned int NUM_ISA_NAMES
35596 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35598 for (i = 0; i < NUM_ISA_NAMES; i++)
35599 if (strcmp (isa_names_table[i].name,
35600 TREE_STRING_POINTER (param_string_cst)) == 0)
35603 if (i == NUM_ISA_NAMES)
35605 error ("Parameter to builtin not valid: %s",
35606 TREE_STRING_POINTER (param_string_cst));
35607 return integer_zero_node;
35610 field = TYPE_FIELDS (__processor_model_type);
35611 /* Get the last field, which is __cpu_features. */
35612 while (DECL_CHAIN (field))
35613 field = DECL_CHAIN (field);
35615 /* Get the appropriate field: __cpu_model.__cpu_features */
35616 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35619 /* Access the 0th element of __cpu_features array. */
35620 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35621 integer_zero_node, NULL_TREE, NULL_TREE);
35623 field_val = (1 << isa_names_table[i].feature);
35624 /* Return __cpu_model.__cpu_features[0] & field_val */
35625 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35626 build_int_cstu (unsigned_type_node, field_val));
35627 return build1 (CONVERT_EXPR, integer_type_node, final);
35629 gcc_unreachable ();
35633 ix86_fold_builtin (tree fndecl, int n_args,
35634 tree *args, bool ignore ATTRIBUTE_UNUSED)
35636 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35638 enum ix86_builtins fn_code = (enum ix86_builtins)
35639 DECL_FUNCTION_CODE (fndecl);
35640 if (fn_code == IX86_BUILTIN_CPU_IS
35641 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35643 gcc_assert (n_args == 1);
35644 return fold_builtin_cpu (fndecl, args);
35648 #ifdef SUBTARGET_FOLD_BUILTIN
35649 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35655 /* Make builtins to detect cpu type and features supported. NAME is
35656 the builtin name, CODE is the builtin code, and FTYPE is the function
35657 type of the builtin. */
35660 make_cpu_type_builtin (const char* name, int code,
35661 enum ix86_builtin_func_type ftype, bool is_const)
35666 type = ix86_get_builtin_func_type (ftype);
35667 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35669 gcc_assert (decl != NULL_TREE);
35670 ix86_builtins[(int) code] = decl;
35671 TREE_READONLY (decl) = is_const;
35674 /* Make builtins to get CPU type and features supported. The created
35677 __builtin_cpu_init (), to detect cpu type and features,
35678 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35679 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35683 ix86_init_platform_type_builtins (void)
35685 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35686 INT_FTYPE_VOID, false);
35687 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35688 INT_FTYPE_PCCHAR, true);
35689 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35690 INT_FTYPE_PCCHAR, true);
35693 /* Internal method for ix86_init_builtins. */
35696 ix86_init_builtins_va_builtins_abi (void)
35698 tree ms_va_ref, sysv_va_ref;
35699 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35700 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35701 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35702 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35706 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35707 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35708 ms_va_ref = build_reference_type (ms_va_list_type_node);
35710 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35713 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35714 fnvoid_va_start_ms =
35715 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35716 fnvoid_va_end_sysv =
35717 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35718 fnvoid_va_start_sysv =
35719 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35721 fnvoid_va_copy_ms =
35722 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35724 fnvoid_va_copy_sysv =
35725 build_function_type_list (void_type_node, sysv_va_ref,
35726 sysv_va_ref, NULL_TREE);
35728 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35729 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35730 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35731 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35732 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35733 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35734 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35735 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35736 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35737 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35738 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35739 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35743 ix86_init_builtin_types (void)
35745 tree float128_type_node, float80_type_node;
35747 /* The __float80 type. */
35748 float80_type_node = long_double_type_node;
35749 if (TYPE_MODE (float80_type_node) != XFmode)
35751 /* The __float80 type. */
35752 float80_type_node = make_node (REAL_TYPE);
35754 TYPE_PRECISION (float80_type_node) = 80;
35755 layout_type (float80_type_node);
35757 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35759 /* The __float128 type. */
35760 float128_type_node = make_node (REAL_TYPE);
35761 TYPE_PRECISION (float128_type_node) = 128;
35762 layout_type (float128_type_node);
35763 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35765 /* This macro is built by i386-builtin-types.awk. */
35766 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35770 ix86_init_builtins (void)
35774 ix86_init_builtin_types ();
35776 /* Builtins to get CPU type and features. */
35777 ix86_init_platform_type_builtins ();
35779 /* TFmode support builtins. */
35780 def_builtin_const (0, "__builtin_infq",
35781 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35782 def_builtin_const (0, "__builtin_huge_valq",
35783 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35785 /* We will expand them to normal call if SSE isn't available since
35786 they are used by libgcc. */
35787 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35788 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35789 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35790 TREE_READONLY (t) = 1;
35791 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35793 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35794 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35795 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35796 TREE_READONLY (t) = 1;
35797 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35799 ix86_init_tm_builtins ();
35800 ix86_init_mmx_sse_builtins ();
35801 ix86_init_mpx_builtins ();
35804 ix86_init_builtins_va_builtins_abi ();
35806 #ifdef SUBTARGET_INIT_BUILTINS
35807 SUBTARGET_INIT_BUILTINS;
35811 /* Return the ix86 builtin for CODE. */
35814 ix86_builtin_decl (unsigned code, bool)
35816 if (code >= IX86_BUILTIN_MAX)
35817 return error_mark_node;
35819 return ix86_builtins[code];
35822 /* Errors in the source file can cause expand_expr to return const0_rtx
35823 where we expect a vector. To avoid crashing, use one of the vector
35824 clear instructions. */
35826 safe_vector_operand (rtx x, machine_mode mode)
35828 if (x == const0_rtx)
35829 x = CONST0_RTX (mode);
35833 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35836 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
35839 tree arg0 = CALL_EXPR_ARG (exp, 0);
35840 tree arg1 = CALL_EXPR_ARG (exp, 1);
35841 rtx op0 = expand_normal (arg0);
35842 rtx op1 = expand_normal (arg1);
35843 machine_mode tmode = insn_data[icode].operand[0].mode;
35844 machine_mode mode0 = insn_data[icode].operand[1].mode;
35845 machine_mode mode1 = insn_data[icode].operand[2].mode;
35847 if (VECTOR_MODE_P (mode0))
35848 op0 = safe_vector_operand (op0, mode0);
35849 if (VECTOR_MODE_P (mode1))
35850 op1 = safe_vector_operand (op1, mode1);
35852 if (optimize || !target
35853 || GET_MODE (target) != tmode
35854 || !insn_data[icode].operand[0].predicate (target, tmode))
35855 target = gen_reg_rtx (tmode);
35857 if (GET_MODE (op1) == SImode && mode1 == TImode)
35859 rtx x = gen_reg_rtx (V4SImode);
35860 emit_insn (gen_sse2_loadd (x, op1));
35861 op1 = gen_lowpart (TImode, x);
35864 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35865 op0 = copy_to_mode_reg (mode0, op0);
35866 if (!insn_data[icode].operand[2].predicate (op1, mode1))
35867 op1 = copy_to_mode_reg (mode1, op1);
35869 pat = GEN_FCN (icode) (target, op0, op1);
35878 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
35881 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
35882 enum ix86_builtin_func_type m_type,
35883 enum rtx_code sub_code)
35888 bool comparison_p = false;
35890 bool last_arg_constant = false;
35891 int num_memory = 0;
35897 machine_mode tmode = insn_data[icode].operand[0].mode;
35901 case MULTI_ARG_4_DF2_DI_I:
35902 case MULTI_ARG_4_DF2_DI_I1:
35903 case MULTI_ARG_4_SF2_SI_I:
35904 case MULTI_ARG_4_SF2_SI_I1:
35906 last_arg_constant = true;
35909 case MULTI_ARG_3_SF:
35910 case MULTI_ARG_3_DF:
35911 case MULTI_ARG_3_SF2:
35912 case MULTI_ARG_3_DF2:
35913 case MULTI_ARG_3_DI:
35914 case MULTI_ARG_3_SI:
35915 case MULTI_ARG_3_SI_DI:
35916 case MULTI_ARG_3_HI:
35917 case MULTI_ARG_3_HI_SI:
35918 case MULTI_ARG_3_QI:
35919 case MULTI_ARG_3_DI2:
35920 case MULTI_ARG_3_SI2:
35921 case MULTI_ARG_3_HI2:
35922 case MULTI_ARG_3_QI2:
35926 case MULTI_ARG_2_SF:
35927 case MULTI_ARG_2_DF:
35928 case MULTI_ARG_2_DI:
35929 case MULTI_ARG_2_SI:
35930 case MULTI_ARG_2_HI:
35931 case MULTI_ARG_2_QI:
35935 case MULTI_ARG_2_DI_IMM:
35936 case MULTI_ARG_2_SI_IMM:
35937 case MULTI_ARG_2_HI_IMM:
35938 case MULTI_ARG_2_QI_IMM:
35940 last_arg_constant = true;
35943 case MULTI_ARG_1_SF:
35944 case MULTI_ARG_1_DF:
35945 case MULTI_ARG_1_SF2:
35946 case MULTI_ARG_1_DF2:
35947 case MULTI_ARG_1_DI:
35948 case MULTI_ARG_1_SI:
35949 case MULTI_ARG_1_HI:
35950 case MULTI_ARG_1_QI:
35951 case MULTI_ARG_1_SI_DI:
35952 case MULTI_ARG_1_HI_DI:
35953 case MULTI_ARG_1_HI_SI:
35954 case MULTI_ARG_1_QI_DI:
35955 case MULTI_ARG_1_QI_SI:
35956 case MULTI_ARG_1_QI_HI:
35960 case MULTI_ARG_2_DI_CMP:
35961 case MULTI_ARG_2_SI_CMP:
35962 case MULTI_ARG_2_HI_CMP:
35963 case MULTI_ARG_2_QI_CMP:
35965 comparison_p = true;
35968 case MULTI_ARG_2_SF_TF:
35969 case MULTI_ARG_2_DF_TF:
35970 case MULTI_ARG_2_DI_TF:
35971 case MULTI_ARG_2_SI_TF:
35972 case MULTI_ARG_2_HI_TF:
35973 case MULTI_ARG_2_QI_TF:
35979 gcc_unreachable ();
35982 if (optimize || !target
35983 || GET_MODE (target) != tmode
35984 || !insn_data[icode].operand[0].predicate (target, tmode))
35985 target = gen_reg_rtx (tmode);
35987 gcc_assert (nargs <= 4);
35989 for (i = 0; i < nargs; i++)
35991 tree arg = CALL_EXPR_ARG (exp, i);
35992 rtx op = expand_normal (arg);
35993 int adjust = (comparison_p) ? 1 : 0;
35994 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
35996 if (last_arg_constant && i == nargs - 1)
35998 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
36000 enum insn_code new_icode = icode;
36003 case CODE_FOR_xop_vpermil2v2df3:
36004 case CODE_FOR_xop_vpermil2v4sf3:
36005 case CODE_FOR_xop_vpermil2v4df3:
36006 case CODE_FOR_xop_vpermil2v8sf3:
36007 error ("the last argument must be a 2-bit immediate");
36008 return gen_reg_rtx (tmode);
36009 case CODE_FOR_xop_rotlv2di3:
36010 new_icode = CODE_FOR_rotlv2di3;
36012 case CODE_FOR_xop_rotlv4si3:
36013 new_icode = CODE_FOR_rotlv4si3;
36015 case CODE_FOR_xop_rotlv8hi3:
36016 new_icode = CODE_FOR_rotlv8hi3;
36018 case CODE_FOR_xop_rotlv16qi3:
36019 new_icode = CODE_FOR_rotlv16qi3;
36021 if (CONST_INT_P (op))
36023 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
36024 op = GEN_INT (INTVAL (op) & mask);
36025 gcc_checking_assert
36026 (insn_data[icode].operand[i + 1].predicate (op, mode));
36030 gcc_checking_assert
36032 && insn_data[new_icode].operand[0].mode == tmode
36033 && insn_data[new_icode].operand[1].mode == tmode
36034 && insn_data[new_icode].operand[2].mode == mode
36035 && insn_data[new_icode].operand[0].predicate
36036 == insn_data[icode].operand[0].predicate
36037 && insn_data[new_icode].operand[1].predicate
36038 == insn_data[icode].operand[1].predicate);
36044 gcc_unreachable ();
36051 if (VECTOR_MODE_P (mode))
36052 op = safe_vector_operand (op, mode);
36054 /* If we aren't optimizing, only allow one memory operand to be
36056 if (memory_operand (op, mode))
36059 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
36062 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
36064 op = force_reg (mode, op);
36068 args[i].mode = mode;
36074 pat = GEN_FCN (icode) (target, args[0].op);
36079 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
36080 GEN_INT ((int)sub_code));
36081 else if (! comparison_p)
36082 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
36085 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
36089 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
36094 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36098 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
36102 gcc_unreachable ();
36112 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
36113 insns with vec_merge. */
36116 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
36120 tree arg0 = CALL_EXPR_ARG (exp, 0);
36121 rtx op1, op0 = expand_normal (arg0);
36122 machine_mode tmode = insn_data[icode].operand[0].mode;
36123 machine_mode mode0 = insn_data[icode].operand[1].mode;
36125 if (optimize || !target
36126 || GET_MODE (target) != tmode
36127 || !insn_data[icode].operand[0].predicate (target, tmode))
36128 target = gen_reg_rtx (tmode);
36130 if (VECTOR_MODE_P (mode0))
36131 op0 = safe_vector_operand (op0, mode0);
36133 if ((optimize && !register_operand (op0, mode0))
36134 || !insn_data[icode].operand[1].predicate (op0, mode0))
36135 op0 = copy_to_mode_reg (mode0, op0);
36138 if (!insn_data[icode].operand[2].predicate (op1, mode0))
36139 op1 = copy_to_mode_reg (mode0, op1);
36141 pat = GEN_FCN (icode) (target, op0, op1);
36148 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
36151 ix86_expand_sse_compare (const struct builtin_description *d,
36152 tree exp, rtx target, bool swap)
36155 tree arg0 = CALL_EXPR_ARG (exp, 0);
36156 tree arg1 = CALL_EXPR_ARG (exp, 1);
36157 rtx op0 = expand_normal (arg0);
36158 rtx op1 = expand_normal (arg1);
36160 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36161 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36162 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36163 enum rtx_code comparison = d->comparison;
36165 if (VECTOR_MODE_P (mode0))
36166 op0 = safe_vector_operand (op0, mode0);
36167 if (VECTOR_MODE_P (mode1))
36168 op1 = safe_vector_operand (op1, mode1);
36170 /* Swap operands if we have a comparison that isn't available in
36173 std::swap (op0, op1);
36175 if (optimize || !target
36176 || GET_MODE (target) != tmode
36177 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36178 target = gen_reg_rtx (tmode);
36180 if ((optimize && !register_operand (op0, mode0))
36181 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36182 op0 = copy_to_mode_reg (mode0, op0);
36183 if ((optimize && !register_operand (op1, mode1))
36184 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36185 op1 = copy_to_mode_reg (mode1, op1);
36187 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36188 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36195 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36198 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36202 tree arg0 = CALL_EXPR_ARG (exp, 0);
36203 tree arg1 = CALL_EXPR_ARG (exp, 1);
36204 rtx op0 = expand_normal (arg0);
36205 rtx op1 = expand_normal (arg1);
36206 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36207 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36208 enum rtx_code comparison = d->comparison;
36210 if (VECTOR_MODE_P (mode0))
36211 op0 = safe_vector_operand (op0, mode0);
36212 if (VECTOR_MODE_P (mode1))
36213 op1 = safe_vector_operand (op1, mode1);
36215 /* Swap operands if we have a comparison that isn't available in
36217 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36218 std::swap (op0, op1);
36220 target = gen_reg_rtx (SImode);
36221 emit_move_insn (target, const0_rtx);
36222 target = gen_rtx_SUBREG (QImode, target, 0);
36224 if ((optimize && !register_operand (op0, mode0))
36225 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36226 op0 = copy_to_mode_reg (mode0, op0);
36227 if ((optimize && !register_operand (op1, mode1))
36228 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36229 op1 = copy_to_mode_reg (mode1, op1);
36231 pat = GEN_FCN (d->icode) (op0, op1);
36235 emit_insn (gen_rtx_SET (VOIDmode,
36236 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36237 gen_rtx_fmt_ee (comparison, QImode,
36241 return SUBREG_REG (target);
36244 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36247 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36251 tree arg0 = CALL_EXPR_ARG (exp, 0);
36252 rtx op1, op0 = expand_normal (arg0);
36253 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36254 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36256 if (optimize || target == 0
36257 || GET_MODE (target) != tmode
36258 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36259 target = gen_reg_rtx (tmode);
36261 if (VECTOR_MODE_P (mode0))
36262 op0 = safe_vector_operand (op0, mode0);
36264 if ((optimize && !register_operand (op0, mode0))
36265 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36266 op0 = copy_to_mode_reg (mode0, op0);
36268 op1 = GEN_INT (d->comparison);
36270 pat = GEN_FCN (d->icode) (target, op0, op1);
36278 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36279 tree exp, rtx target)
36282 tree arg0 = CALL_EXPR_ARG (exp, 0);
36283 tree arg1 = CALL_EXPR_ARG (exp, 1);
36284 rtx op0 = expand_normal (arg0);
36285 rtx op1 = expand_normal (arg1);
36287 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36288 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36289 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36291 if (optimize || target == 0
36292 || GET_MODE (target) != tmode
36293 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36294 target = gen_reg_rtx (tmode);
36296 op0 = safe_vector_operand (op0, mode0);
36297 op1 = safe_vector_operand (op1, mode1);
36299 if ((optimize && !register_operand (op0, mode0))
36300 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36301 op0 = copy_to_mode_reg (mode0, op0);
36302 if ((optimize && !register_operand (op1, mode1))
36303 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36304 op1 = copy_to_mode_reg (mode1, op1);
36306 op2 = GEN_INT (d->comparison);
36308 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36315 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36318 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36322 tree arg0 = CALL_EXPR_ARG (exp, 0);
36323 tree arg1 = CALL_EXPR_ARG (exp, 1);
36324 rtx op0 = expand_normal (arg0);
36325 rtx op1 = expand_normal (arg1);
36326 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36327 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36328 enum rtx_code comparison = d->comparison;
36330 if (VECTOR_MODE_P (mode0))
36331 op0 = safe_vector_operand (op0, mode0);
36332 if (VECTOR_MODE_P (mode1))
36333 op1 = safe_vector_operand (op1, mode1);
36335 target = gen_reg_rtx (SImode);
36336 emit_move_insn (target, const0_rtx);
36337 target = gen_rtx_SUBREG (QImode, target, 0);
36339 if ((optimize && !register_operand (op0, mode0))
36340 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36341 op0 = copy_to_mode_reg (mode0, op0);
36342 if ((optimize && !register_operand (op1, mode1))
36343 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36344 op1 = copy_to_mode_reg (mode1, op1);
36346 pat = GEN_FCN (d->icode) (op0, op1);
36350 emit_insn (gen_rtx_SET (VOIDmode,
36351 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36352 gen_rtx_fmt_ee (comparison, QImode,
36356 return SUBREG_REG (target);
36359 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36362 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36363 tree exp, rtx target)
36366 tree arg0 = CALL_EXPR_ARG (exp, 0);
36367 tree arg1 = CALL_EXPR_ARG (exp, 1);
36368 tree arg2 = CALL_EXPR_ARG (exp, 2);
36369 tree arg3 = CALL_EXPR_ARG (exp, 3);
36370 tree arg4 = CALL_EXPR_ARG (exp, 4);
36371 rtx scratch0, scratch1;
36372 rtx op0 = expand_normal (arg0);
36373 rtx op1 = expand_normal (arg1);
36374 rtx op2 = expand_normal (arg2);
36375 rtx op3 = expand_normal (arg3);
36376 rtx op4 = expand_normal (arg4);
36377 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36379 tmode0 = insn_data[d->icode].operand[0].mode;
36380 tmode1 = insn_data[d->icode].operand[1].mode;
36381 modev2 = insn_data[d->icode].operand[2].mode;
36382 modei3 = insn_data[d->icode].operand[3].mode;
36383 modev4 = insn_data[d->icode].operand[4].mode;
36384 modei5 = insn_data[d->icode].operand[5].mode;
36385 modeimm = insn_data[d->icode].operand[6].mode;
36387 if (VECTOR_MODE_P (modev2))
36388 op0 = safe_vector_operand (op0, modev2);
36389 if (VECTOR_MODE_P (modev4))
36390 op2 = safe_vector_operand (op2, modev4);
36392 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36393 op0 = copy_to_mode_reg (modev2, op0);
36394 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36395 op1 = copy_to_mode_reg (modei3, op1);
36396 if ((optimize && !register_operand (op2, modev4))
36397 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36398 op2 = copy_to_mode_reg (modev4, op2);
36399 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36400 op3 = copy_to_mode_reg (modei5, op3);
36402 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36404 error ("the fifth argument must be an 8-bit immediate");
36408 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36410 if (optimize || !target
36411 || GET_MODE (target) != tmode0
36412 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36413 target = gen_reg_rtx (tmode0);
36415 scratch1 = gen_reg_rtx (tmode1);
36417 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36419 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36421 if (optimize || !target
36422 || GET_MODE (target) != tmode1
36423 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36424 target = gen_reg_rtx (tmode1);
36426 scratch0 = gen_reg_rtx (tmode0);
36428 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36432 gcc_assert (d->flag);
36434 scratch0 = gen_reg_rtx (tmode0);
36435 scratch1 = gen_reg_rtx (tmode1);
36437 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36447 target = gen_reg_rtx (SImode);
36448 emit_move_insn (target, const0_rtx);
36449 target = gen_rtx_SUBREG (QImode, target, 0);
36452 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36453 gen_rtx_fmt_ee (EQ, QImode,
36454 gen_rtx_REG ((machine_mode) d->flag,
36457 return SUBREG_REG (target);
36464 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36467 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36468 tree exp, rtx target)
36471 tree arg0 = CALL_EXPR_ARG (exp, 0);
36472 tree arg1 = CALL_EXPR_ARG (exp, 1);
36473 tree arg2 = CALL_EXPR_ARG (exp, 2);
36474 rtx scratch0, scratch1;
36475 rtx op0 = expand_normal (arg0);
36476 rtx op1 = expand_normal (arg1);
36477 rtx op2 = expand_normal (arg2);
36478 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36480 tmode0 = insn_data[d->icode].operand[0].mode;
36481 tmode1 = insn_data[d->icode].operand[1].mode;
36482 modev2 = insn_data[d->icode].operand[2].mode;
36483 modev3 = insn_data[d->icode].operand[3].mode;
36484 modeimm = insn_data[d->icode].operand[4].mode;
36486 if (VECTOR_MODE_P (modev2))
36487 op0 = safe_vector_operand (op0, modev2);
36488 if (VECTOR_MODE_P (modev3))
36489 op1 = safe_vector_operand (op1, modev3);
36491 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36492 op0 = copy_to_mode_reg (modev2, op0);
36493 if ((optimize && !register_operand (op1, modev3))
36494 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36495 op1 = copy_to_mode_reg (modev3, op1);
36497 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36499 error ("the third argument must be an 8-bit immediate");
36503 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36505 if (optimize || !target
36506 || GET_MODE (target) != tmode0
36507 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36508 target = gen_reg_rtx (tmode0);
36510 scratch1 = gen_reg_rtx (tmode1);
36512 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36514 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36516 if (optimize || !target
36517 || GET_MODE (target) != tmode1
36518 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36519 target = gen_reg_rtx (tmode1);
36521 scratch0 = gen_reg_rtx (tmode0);
36523 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36527 gcc_assert (d->flag);
36529 scratch0 = gen_reg_rtx (tmode0);
36530 scratch1 = gen_reg_rtx (tmode1);
36532 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36542 target = gen_reg_rtx (SImode);
36543 emit_move_insn (target, const0_rtx);
36544 target = gen_rtx_SUBREG (QImode, target, 0);
36547 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36548 gen_rtx_fmt_ee (EQ, QImode,
36549 gen_rtx_REG ((machine_mode) d->flag,
36552 return SUBREG_REG (target);
36558 /* Subroutine of ix86_expand_builtin to take care of insns with
36559 variable number of operands. */
36562 ix86_expand_args_builtin (const struct builtin_description *d,
36563 tree exp, rtx target)
36565 rtx pat, real_target;
36566 unsigned int i, nargs;
36567 unsigned int nargs_constant = 0;
36568 unsigned int mask_pos = 0;
36569 int num_memory = 0;
36575 bool last_arg_count = false;
36576 enum insn_code icode = d->icode;
36577 const struct insn_data_d *insn_p = &insn_data[icode];
36578 machine_mode tmode = insn_p->operand[0].mode;
36579 machine_mode rmode = VOIDmode;
36581 enum rtx_code comparison = d->comparison;
36583 switch ((enum ix86_builtin_func_type) d->flag)
36585 case V2DF_FTYPE_V2DF_ROUND:
36586 case V4DF_FTYPE_V4DF_ROUND:
36587 case V4SF_FTYPE_V4SF_ROUND:
36588 case V8SF_FTYPE_V8SF_ROUND:
36589 case V4SI_FTYPE_V4SF_ROUND:
36590 case V8SI_FTYPE_V8SF_ROUND:
36591 return ix86_expand_sse_round (d, exp, target);
36592 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36593 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36594 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36595 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36596 case INT_FTYPE_V8SF_V8SF_PTEST:
36597 case INT_FTYPE_V4DI_V4DI_PTEST:
36598 case INT_FTYPE_V4DF_V4DF_PTEST:
36599 case INT_FTYPE_V4SF_V4SF_PTEST:
36600 case INT_FTYPE_V2DI_V2DI_PTEST:
36601 case INT_FTYPE_V2DF_V2DF_PTEST:
36602 return ix86_expand_sse_ptest (d, exp, target);
36603 case FLOAT128_FTYPE_FLOAT128:
36604 case FLOAT_FTYPE_FLOAT:
36605 case INT_FTYPE_INT:
36606 case UINT64_FTYPE_INT:
36607 case UINT16_FTYPE_UINT16:
36608 case INT64_FTYPE_INT64:
36609 case INT64_FTYPE_V4SF:
36610 case INT64_FTYPE_V2DF:
36611 case INT_FTYPE_V16QI:
36612 case INT_FTYPE_V8QI:
36613 case INT_FTYPE_V8SF:
36614 case INT_FTYPE_V4DF:
36615 case INT_FTYPE_V4SF:
36616 case INT_FTYPE_V2DF:
36617 case INT_FTYPE_V32QI:
36618 case V16QI_FTYPE_V16QI:
36619 case V8SI_FTYPE_V8SF:
36620 case V8SI_FTYPE_V4SI:
36621 case V8HI_FTYPE_V8HI:
36622 case V8HI_FTYPE_V16QI:
36623 case V8QI_FTYPE_V8QI:
36624 case V8SF_FTYPE_V8SF:
36625 case V8SF_FTYPE_V8SI:
36626 case V8SF_FTYPE_V4SF:
36627 case V8SF_FTYPE_V8HI:
36628 case V4SI_FTYPE_V4SI:
36629 case V4SI_FTYPE_V16QI:
36630 case V4SI_FTYPE_V4SF:
36631 case V4SI_FTYPE_V8SI:
36632 case V4SI_FTYPE_V8HI:
36633 case V4SI_FTYPE_V4DF:
36634 case V4SI_FTYPE_V2DF:
36635 case V4HI_FTYPE_V4HI:
36636 case V4DF_FTYPE_V4DF:
36637 case V4DF_FTYPE_V4SI:
36638 case V4DF_FTYPE_V4SF:
36639 case V4DF_FTYPE_V2DF:
36640 case V4SF_FTYPE_V4SF:
36641 case V4SF_FTYPE_V4SI:
36642 case V4SF_FTYPE_V8SF:
36643 case V4SF_FTYPE_V4DF:
36644 case V4SF_FTYPE_V8HI:
36645 case V4SF_FTYPE_V2DF:
36646 case V2DI_FTYPE_V2DI:
36647 case V2DI_FTYPE_V16QI:
36648 case V2DI_FTYPE_V8HI:
36649 case V2DI_FTYPE_V4SI:
36650 case V2DF_FTYPE_V2DF:
36651 case V2DF_FTYPE_V4SI:
36652 case V2DF_FTYPE_V4DF:
36653 case V2DF_FTYPE_V4SF:
36654 case V2DF_FTYPE_V2SI:
36655 case V2SI_FTYPE_V2SI:
36656 case V2SI_FTYPE_V4SF:
36657 case V2SI_FTYPE_V2SF:
36658 case V2SI_FTYPE_V2DF:
36659 case V2SF_FTYPE_V2SF:
36660 case V2SF_FTYPE_V2SI:
36661 case V32QI_FTYPE_V32QI:
36662 case V32QI_FTYPE_V16QI:
36663 case V16HI_FTYPE_V16HI:
36664 case V16HI_FTYPE_V8HI:
36665 case V8SI_FTYPE_V8SI:
36666 case V16HI_FTYPE_V16QI:
36667 case V8SI_FTYPE_V16QI:
36668 case V4DI_FTYPE_V16QI:
36669 case V8SI_FTYPE_V8HI:
36670 case V4DI_FTYPE_V8HI:
36671 case V4DI_FTYPE_V4SI:
36672 case V4DI_FTYPE_V2DI:
36674 case HI_FTYPE_V16QI:
36675 case SI_FTYPE_V32QI:
36676 case DI_FTYPE_V64QI:
36677 case V16QI_FTYPE_HI:
36678 case V32QI_FTYPE_SI:
36679 case V64QI_FTYPE_DI:
36680 case V8HI_FTYPE_QI:
36681 case V16HI_FTYPE_HI:
36682 case V32HI_FTYPE_SI:
36683 case V4SI_FTYPE_QI:
36684 case V8SI_FTYPE_QI:
36685 case V4SI_FTYPE_HI:
36686 case V8SI_FTYPE_HI:
36687 case QI_FTYPE_V8HI:
36688 case HI_FTYPE_V16HI:
36689 case SI_FTYPE_V32HI:
36690 case QI_FTYPE_V4SI:
36691 case QI_FTYPE_V8SI:
36692 case HI_FTYPE_V16SI:
36693 case QI_FTYPE_V2DI:
36694 case QI_FTYPE_V4DI:
36695 case QI_FTYPE_V8DI:
36696 case UINT_FTYPE_V2DF:
36697 case UINT_FTYPE_V4SF:
36698 case UINT64_FTYPE_V2DF:
36699 case UINT64_FTYPE_V4SF:
36700 case V16QI_FTYPE_V8DI:
36701 case V16HI_FTYPE_V16SI:
36702 case V16SI_FTYPE_HI:
36703 case V2DI_FTYPE_QI:
36704 case V4DI_FTYPE_QI:
36705 case V16SI_FTYPE_V16SI:
36706 case V16SI_FTYPE_INT:
36707 case V16SF_FTYPE_FLOAT:
36708 case V16SF_FTYPE_V8SF:
36709 case V16SI_FTYPE_V8SI:
36710 case V16SF_FTYPE_V4SF:
36711 case V16SI_FTYPE_V4SI:
36712 case V16SF_FTYPE_V16SF:
36713 case V8HI_FTYPE_V8DI:
36714 case V8UHI_FTYPE_V8UHI:
36715 case V8SI_FTYPE_V8DI:
36716 case V8SF_FTYPE_V8DF:
36717 case V8DI_FTYPE_QI:
36718 case V8DI_FTYPE_INT64:
36719 case V8DI_FTYPE_V4DI:
36720 case V8DI_FTYPE_V8DI:
36721 case V8DF_FTYPE_DOUBLE:
36722 case V8DF_FTYPE_V4DF:
36723 case V8DF_FTYPE_V2DF:
36724 case V8DF_FTYPE_V8DF:
36725 case V8DF_FTYPE_V8SI:
36728 case V4SF_FTYPE_V4SF_VEC_MERGE:
36729 case V2DF_FTYPE_V2DF_VEC_MERGE:
36730 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36731 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36732 case V16QI_FTYPE_V16QI_V16QI:
36733 case V16QI_FTYPE_V8HI_V8HI:
36734 case V16SI_FTYPE_V16SI_V16SI:
36735 case V16SF_FTYPE_V16SF_V16SF:
36736 case V16SF_FTYPE_V16SF_V16SI:
36737 case V8QI_FTYPE_V8QI_V8QI:
36738 case V8QI_FTYPE_V4HI_V4HI:
36739 case V8HI_FTYPE_V8HI_V8HI:
36740 case V8HI_FTYPE_V16QI_V16QI:
36741 case V8HI_FTYPE_V4SI_V4SI:
36742 case V8SF_FTYPE_V8SF_V8SF:
36743 case V8SF_FTYPE_V8SF_V8SI:
36744 case V8DI_FTYPE_V8DI_V8DI:
36745 case V8DF_FTYPE_V8DF_V8DF:
36746 case V8DF_FTYPE_V8DF_V8DI:
36747 case V4SI_FTYPE_V4SI_V4SI:
36748 case V4SI_FTYPE_V8HI_V8HI:
36749 case V4SI_FTYPE_V4SF_V4SF:
36750 case V4SI_FTYPE_V2DF_V2DF:
36751 case V4HI_FTYPE_V4HI_V4HI:
36752 case V4HI_FTYPE_V8QI_V8QI:
36753 case V4HI_FTYPE_V2SI_V2SI:
36754 case V4DF_FTYPE_V4DF_V4DF:
36755 case V4DF_FTYPE_V4DF_V4DI:
36756 case V4SF_FTYPE_V4SF_V4SF:
36757 case V4SF_FTYPE_V4SF_V4SI:
36758 case V4SF_FTYPE_V4SF_V2SI:
36759 case V4SF_FTYPE_V4SF_V2DF:
36760 case V4SF_FTYPE_V4SF_UINT:
36761 case V4SF_FTYPE_V4SF_UINT64:
36762 case V4SF_FTYPE_V4SF_DI:
36763 case V4SF_FTYPE_V4SF_SI:
36764 case V2DI_FTYPE_V2DI_V2DI:
36765 case V2DI_FTYPE_V16QI_V16QI:
36766 case V2DI_FTYPE_V4SI_V4SI:
36767 case V2UDI_FTYPE_V4USI_V4USI:
36768 case V2DI_FTYPE_V2DI_V16QI:
36769 case V2DI_FTYPE_V2DF_V2DF:
36770 case V2SI_FTYPE_V2SI_V2SI:
36771 case V2SI_FTYPE_V4HI_V4HI:
36772 case V2SI_FTYPE_V2SF_V2SF:
36773 case V2DF_FTYPE_V2DF_V2DF:
36774 case V2DF_FTYPE_V2DF_V4SF:
36775 case V2DF_FTYPE_V2DF_V2DI:
36776 case V2DF_FTYPE_V2DF_DI:
36777 case V2DF_FTYPE_V2DF_SI:
36778 case V2DF_FTYPE_V2DF_UINT:
36779 case V2DF_FTYPE_V2DF_UINT64:
36780 case V2SF_FTYPE_V2SF_V2SF:
36781 case V1DI_FTYPE_V1DI_V1DI:
36782 case V1DI_FTYPE_V8QI_V8QI:
36783 case V1DI_FTYPE_V2SI_V2SI:
36784 case V32QI_FTYPE_V16HI_V16HI:
36785 case V16HI_FTYPE_V8SI_V8SI:
36786 case V32QI_FTYPE_V32QI_V32QI:
36787 case V16HI_FTYPE_V32QI_V32QI:
36788 case V16HI_FTYPE_V16HI_V16HI:
36789 case V8SI_FTYPE_V4DF_V4DF:
36790 case V8SI_FTYPE_V8SI_V8SI:
36791 case V8SI_FTYPE_V16HI_V16HI:
36792 case V4DI_FTYPE_V4DI_V4DI:
36793 case V4DI_FTYPE_V8SI_V8SI:
36794 case V4UDI_FTYPE_V8USI_V8USI:
36795 case QI_FTYPE_V8DI_V8DI:
36796 case V8DI_FTYPE_V64QI_V64QI:
36797 case HI_FTYPE_V16SI_V16SI:
36798 if (comparison == UNKNOWN)
36799 return ix86_expand_binop_builtin (icode, exp, target);
36802 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36803 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36804 gcc_assert (comparison != UNKNOWN);
36808 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36809 case V16HI_FTYPE_V16HI_SI_COUNT:
36810 case V8SI_FTYPE_V8SI_V4SI_COUNT:
36811 case V8SI_FTYPE_V8SI_SI_COUNT:
36812 case V4DI_FTYPE_V4DI_V2DI_COUNT:
36813 case V4DI_FTYPE_V4DI_INT_COUNT:
36814 case V8HI_FTYPE_V8HI_V8HI_COUNT:
36815 case V8HI_FTYPE_V8HI_SI_COUNT:
36816 case V4SI_FTYPE_V4SI_V4SI_COUNT:
36817 case V4SI_FTYPE_V4SI_SI_COUNT:
36818 case V4HI_FTYPE_V4HI_V4HI_COUNT:
36819 case V4HI_FTYPE_V4HI_SI_COUNT:
36820 case V2DI_FTYPE_V2DI_V2DI_COUNT:
36821 case V2DI_FTYPE_V2DI_SI_COUNT:
36822 case V2SI_FTYPE_V2SI_V2SI_COUNT:
36823 case V2SI_FTYPE_V2SI_SI_COUNT:
36824 case V1DI_FTYPE_V1DI_V1DI_COUNT:
36825 case V1DI_FTYPE_V1DI_SI_COUNT:
36827 last_arg_count = true;
36829 case UINT64_FTYPE_UINT64_UINT64:
36830 case UINT_FTYPE_UINT_UINT:
36831 case UINT_FTYPE_UINT_USHORT:
36832 case UINT_FTYPE_UINT_UCHAR:
36833 case UINT16_FTYPE_UINT16_INT:
36834 case UINT8_FTYPE_UINT8_INT:
36835 case HI_FTYPE_HI_HI:
36836 case SI_FTYPE_SI_SI:
36837 case DI_FTYPE_DI_DI:
36838 case V16SI_FTYPE_V8DF_V8DF:
36841 case V2DI_FTYPE_V2DI_INT_CONVERT:
36844 nargs_constant = 1;
36846 case V4DI_FTYPE_V4DI_INT_CONVERT:
36849 nargs_constant = 1;
36851 case V8DI_FTYPE_V8DI_INT_CONVERT:
36854 nargs_constant = 1;
36856 case V8HI_FTYPE_V8HI_INT:
36857 case V8HI_FTYPE_V8SF_INT:
36858 case V16HI_FTYPE_V16SF_INT:
36859 case V8HI_FTYPE_V4SF_INT:
36860 case V8SF_FTYPE_V8SF_INT:
36861 case V4SF_FTYPE_V16SF_INT:
36862 case V16SF_FTYPE_V16SF_INT:
36863 case V4SI_FTYPE_V4SI_INT:
36864 case V4SI_FTYPE_V8SI_INT:
36865 case V4HI_FTYPE_V4HI_INT:
36866 case V4DF_FTYPE_V4DF_INT:
36867 case V4DF_FTYPE_V8DF_INT:
36868 case V4SF_FTYPE_V4SF_INT:
36869 case V4SF_FTYPE_V8SF_INT:
36870 case V2DI_FTYPE_V2DI_INT:
36871 case V2DF_FTYPE_V2DF_INT:
36872 case V2DF_FTYPE_V4DF_INT:
36873 case V16HI_FTYPE_V16HI_INT:
36874 case V8SI_FTYPE_V8SI_INT:
36875 case V16SI_FTYPE_V16SI_INT:
36876 case V4SI_FTYPE_V16SI_INT:
36877 case V4DI_FTYPE_V4DI_INT:
36878 case V2DI_FTYPE_V4DI_INT:
36879 case V4DI_FTYPE_V8DI_INT:
36880 case HI_FTYPE_HI_INT:
36881 case QI_FTYPE_V4SF_INT:
36882 case QI_FTYPE_V2DF_INT:
36884 nargs_constant = 1;
36886 case V16QI_FTYPE_V16QI_V16QI_V16QI:
36887 case V8SF_FTYPE_V8SF_V8SF_V8SF:
36888 case V4DF_FTYPE_V4DF_V4DF_V4DF:
36889 case V4SF_FTYPE_V4SF_V4SF_V4SF:
36890 case V2DF_FTYPE_V2DF_V2DF_V2DF:
36891 case V32QI_FTYPE_V32QI_V32QI_V32QI:
36892 case HI_FTYPE_V16SI_V16SI_HI:
36893 case QI_FTYPE_V8DI_V8DI_QI:
36894 case V16HI_FTYPE_V16SI_V16HI_HI:
36895 case V16QI_FTYPE_V16SI_V16QI_HI:
36896 case V16QI_FTYPE_V8DI_V16QI_QI:
36897 case V16SF_FTYPE_V16SF_V16SF_HI:
36898 case V16SF_FTYPE_V16SF_V16SF_V16SF:
36899 case V16SF_FTYPE_V16SF_V16SI_V16SF:
36900 case V16SF_FTYPE_V16SI_V16SF_HI:
36901 case V16SF_FTYPE_V16SI_V16SF_V16SF:
36902 case V16SF_FTYPE_V4SF_V16SF_HI:
36903 case V16SI_FTYPE_SI_V16SI_HI:
36904 case V16SI_FTYPE_V16HI_V16SI_HI:
36905 case V16SI_FTYPE_V16QI_V16SI_HI:
36906 case V16SI_FTYPE_V16SF_V16SI_HI:
36907 case V8SF_FTYPE_V4SF_V8SF_QI:
36908 case V4DF_FTYPE_V2DF_V4DF_QI:
36909 case V8SI_FTYPE_V4SI_V8SI_QI:
36910 case V8SI_FTYPE_SI_V8SI_QI:
36911 case V4SI_FTYPE_V4SI_V4SI_QI:
36912 case V4SI_FTYPE_SI_V4SI_QI:
36913 case V4DI_FTYPE_V2DI_V4DI_QI:
36914 case V4DI_FTYPE_DI_V4DI_QI:
36915 case V2DI_FTYPE_V2DI_V2DI_QI:
36916 case V2DI_FTYPE_DI_V2DI_QI:
36917 case V64QI_FTYPE_V64QI_V64QI_DI:
36918 case V64QI_FTYPE_V16QI_V64QI_DI:
36919 case V64QI_FTYPE_QI_V64QI_DI:
36920 case V32QI_FTYPE_V32QI_V32QI_SI:
36921 case V32QI_FTYPE_V16QI_V32QI_SI:
36922 case V32QI_FTYPE_QI_V32QI_SI:
36923 case V16QI_FTYPE_V16QI_V16QI_HI:
36924 case V16QI_FTYPE_QI_V16QI_HI:
36925 case V32HI_FTYPE_V8HI_V32HI_SI:
36926 case V32HI_FTYPE_HI_V32HI_SI:
36927 case V16HI_FTYPE_V8HI_V16HI_HI:
36928 case V16HI_FTYPE_HI_V16HI_HI:
36929 case V8HI_FTYPE_V8HI_V8HI_QI:
36930 case V8HI_FTYPE_HI_V8HI_QI:
36931 case V8SF_FTYPE_V8HI_V8SF_QI:
36932 case V4SF_FTYPE_V8HI_V4SF_QI:
36933 case V8SI_FTYPE_V8SF_V8SI_QI:
36934 case V4SI_FTYPE_V4SF_V4SI_QI:
36935 case V8DI_FTYPE_V8SF_V8DI_QI:
36936 case V4DI_FTYPE_V4SF_V4DI_QI:
36937 case V2DI_FTYPE_V4SF_V2DI_QI:
36938 case V8SF_FTYPE_V8DI_V8SF_QI:
36939 case V4SF_FTYPE_V4DI_V4SF_QI:
36940 case V4SF_FTYPE_V2DI_V4SF_QI:
36941 case V8DF_FTYPE_V8DI_V8DF_QI:
36942 case V4DF_FTYPE_V4DI_V4DF_QI:
36943 case V2DF_FTYPE_V2DI_V2DF_QI:
36944 case V16QI_FTYPE_V8HI_V16QI_QI:
36945 case V16QI_FTYPE_V16HI_V16QI_HI:
36946 case V16QI_FTYPE_V4SI_V16QI_QI:
36947 case V16QI_FTYPE_V8SI_V16QI_QI:
36948 case V8HI_FTYPE_V4SI_V8HI_QI:
36949 case V8HI_FTYPE_V8SI_V8HI_QI:
36950 case V16QI_FTYPE_V2DI_V16QI_QI:
36951 case V16QI_FTYPE_V4DI_V16QI_QI:
36952 case V8HI_FTYPE_V2DI_V8HI_QI:
36953 case V8HI_FTYPE_V4DI_V8HI_QI:
36954 case V4SI_FTYPE_V2DI_V4SI_QI:
36955 case V4SI_FTYPE_V4DI_V4SI_QI:
36956 case V32QI_FTYPE_V32HI_V32QI_SI:
36957 case HI_FTYPE_V16QI_V16QI_HI:
36958 case SI_FTYPE_V32QI_V32QI_SI:
36959 case DI_FTYPE_V64QI_V64QI_DI:
36960 case QI_FTYPE_V8HI_V8HI_QI:
36961 case HI_FTYPE_V16HI_V16HI_HI:
36962 case SI_FTYPE_V32HI_V32HI_SI:
36963 case QI_FTYPE_V4SI_V4SI_QI:
36964 case QI_FTYPE_V8SI_V8SI_QI:
36965 case QI_FTYPE_V2DI_V2DI_QI:
36966 case QI_FTYPE_V4DI_V4DI_QI:
36967 case V4SF_FTYPE_V2DF_V4SF_QI:
36968 case V4SF_FTYPE_V4DF_V4SF_QI:
36969 case V16SI_FTYPE_V16SI_V16SI_HI:
36970 case V16SI_FTYPE_V16SI_V16SI_V16SI:
36971 case V16SI_FTYPE_V4SI_V16SI_HI:
36972 case V2DI_FTYPE_V2DI_V2DI_V2DI:
36973 case V2DI_FTYPE_V4SI_V2DI_QI:
36974 case V2DI_FTYPE_V8HI_V2DI_QI:
36975 case V2DI_FTYPE_V16QI_V2DI_QI:
36976 case V4DI_FTYPE_V4DI_V4DI_QI:
36977 case V4DI_FTYPE_V4SI_V4DI_QI:
36978 case V4DI_FTYPE_V8HI_V4DI_QI:
36979 case V4DI_FTYPE_V16QI_V4DI_QI:
36980 case V8DI_FTYPE_V8DF_V8DI_QI:
36981 case V4DI_FTYPE_V4DF_V4DI_QI:
36982 case V2DI_FTYPE_V2DF_V2DI_QI:
36983 case V4SI_FTYPE_V4DF_V4SI_QI:
36984 case V4SI_FTYPE_V2DF_V4SI_QI:
36985 case V4SI_FTYPE_V8HI_V4SI_QI:
36986 case V4SI_FTYPE_V16QI_V4SI_QI:
36987 case V8SI_FTYPE_V8SI_V8SI_V8SI:
36988 case V4DI_FTYPE_V4DI_V4DI_V4DI:
36989 case V8DF_FTYPE_V2DF_V8DF_QI:
36990 case V8DF_FTYPE_V4DF_V8DF_QI:
36991 case V8DF_FTYPE_V8DF_V8DF_QI:
36992 case V8DF_FTYPE_V8DF_V8DF_V8DF:
36993 case V8SF_FTYPE_V8SF_V8SF_QI:
36994 case V8SF_FTYPE_V8SI_V8SF_QI:
36995 case V4DF_FTYPE_V4DF_V4DF_QI:
36996 case V4SF_FTYPE_V4SF_V4SF_QI:
36997 case V2DF_FTYPE_V2DF_V2DF_QI:
36998 case V2DF_FTYPE_V4SF_V2DF_QI:
36999 case V2DF_FTYPE_V4SI_V2DF_QI:
37000 case V4SF_FTYPE_V4SI_V4SF_QI:
37001 case V4DF_FTYPE_V4SF_V4DF_QI:
37002 case V4DF_FTYPE_V4SI_V4DF_QI:
37003 case V8SI_FTYPE_V8SI_V8SI_QI:
37004 case V8SI_FTYPE_V8HI_V8SI_QI:
37005 case V8SI_FTYPE_V16QI_V8SI_QI:
37006 case V8DF_FTYPE_V8DF_V8DI_V8DF:
37007 case V8DF_FTYPE_V8DI_V8DF_V8DF:
37008 case V8DF_FTYPE_V8SF_V8DF_QI:
37009 case V8DF_FTYPE_V8SI_V8DF_QI:
37010 case V8DI_FTYPE_DI_V8DI_QI:
37011 case V16SF_FTYPE_V8SF_V16SF_HI:
37012 case V16SI_FTYPE_V8SI_V16SI_HI:
37013 case V16HI_FTYPE_V16HI_V16HI_HI:
37014 case V8HI_FTYPE_V16QI_V8HI_QI:
37015 case V16HI_FTYPE_V16QI_V16HI_HI:
37016 case V32HI_FTYPE_V32HI_V32HI_SI:
37017 case V32HI_FTYPE_V32QI_V32HI_SI:
37018 case V8DI_FTYPE_V16QI_V8DI_QI:
37019 case V8DI_FTYPE_V2DI_V8DI_QI:
37020 case V8DI_FTYPE_V4DI_V8DI_QI:
37021 case V8DI_FTYPE_V8DI_V8DI_QI:
37022 case V8DI_FTYPE_V8DI_V8DI_V8DI:
37023 case V8DI_FTYPE_V8HI_V8DI_QI:
37024 case V8DI_FTYPE_V8SI_V8DI_QI:
37025 case V8HI_FTYPE_V8DI_V8HI_QI:
37026 case V8SF_FTYPE_V8DF_V8SF_QI:
37027 case V8SI_FTYPE_V8DF_V8SI_QI:
37028 case V8SI_FTYPE_V8DI_V8SI_QI:
37029 case V4SI_FTYPE_V4SI_V4SI_V4SI:
37032 case V32QI_FTYPE_V32QI_V32QI_INT:
37033 case V16HI_FTYPE_V16HI_V16HI_INT:
37034 case V16QI_FTYPE_V16QI_V16QI_INT:
37035 case V4DI_FTYPE_V4DI_V4DI_INT:
37036 case V8HI_FTYPE_V8HI_V8HI_INT:
37037 case V8SI_FTYPE_V8SI_V8SI_INT:
37038 case V8SI_FTYPE_V8SI_V4SI_INT:
37039 case V8SF_FTYPE_V8SF_V8SF_INT:
37040 case V8SF_FTYPE_V8SF_V4SF_INT:
37041 case V4SI_FTYPE_V4SI_V4SI_INT:
37042 case V4DF_FTYPE_V4DF_V4DF_INT:
37043 case V16SF_FTYPE_V16SF_V16SF_INT:
37044 case V16SF_FTYPE_V16SF_V4SF_INT:
37045 case V16SI_FTYPE_V16SI_V4SI_INT:
37046 case V4DF_FTYPE_V4DF_V2DF_INT:
37047 case V4SF_FTYPE_V4SF_V4SF_INT:
37048 case V2DI_FTYPE_V2DI_V2DI_INT:
37049 case V4DI_FTYPE_V4DI_V2DI_INT:
37050 case V2DF_FTYPE_V2DF_V2DF_INT:
37051 case QI_FTYPE_V8DI_V8DI_INT:
37052 case QI_FTYPE_V8DF_V8DF_INT:
37053 case QI_FTYPE_V2DF_V2DF_INT:
37054 case QI_FTYPE_V4SF_V4SF_INT:
37055 case HI_FTYPE_V16SI_V16SI_INT:
37056 case HI_FTYPE_V16SF_V16SF_INT:
37058 nargs_constant = 1;
37060 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
37063 nargs_constant = 1;
37065 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
37068 nargs_constant = 1;
37070 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
37073 nargs_constant = 1;
37075 case V2DI_FTYPE_V2DI_UINT_UINT:
37077 nargs_constant = 2;
37079 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
37082 nargs_constant = 1;
37084 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
37088 nargs_constant = 1;
37090 case QI_FTYPE_V8DF_INT_QI:
37091 case QI_FTYPE_V4DF_INT_QI:
37092 case QI_FTYPE_V2DF_INT_QI:
37093 case HI_FTYPE_V16SF_INT_HI:
37094 case QI_FTYPE_V8SF_INT_QI:
37095 case QI_FTYPE_V4SF_INT_QI:
37098 nargs_constant = 1;
37100 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
37104 nargs_constant = 1;
37106 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
37110 nargs_constant = 1;
37112 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
37113 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
37114 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
37115 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
37116 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
37117 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
37118 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
37119 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
37120 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
37121 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
37122 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
37123 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
37124 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
37125 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
37126 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
37127 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
37128 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
37129 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
37130 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
37131 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
37132 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
37133 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
37134 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
37135 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
37136 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
37137 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
37138 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
37139 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
37140 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
37141 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
37142 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
37143 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
37144 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
37145 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
37146 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
37147 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
37148 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
37149 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
37150 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
37151 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
37152 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
37153 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
37154 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
37155 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
37156 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
37157 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
37158 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
37159 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
37160 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
37161 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
37162 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
37163 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
37164 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
37165 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
37168 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37169 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37170 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37171 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37172 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37174 nargs_constant = 1;
37176 case QI_FTYPE_V4DI_V4DI_INT_QI:
37177 case QI_FTYPE_V8SI_V8SI_INT_QI:
37178 case QI_FTYPE_V4DF_V4DF_INT_QI:
37179 case QI_FTYPE_V8SF_V8SF_INT_QI:
37180 case QI_FTYPE_V2DI_V2DI_INT_QI:
37181 case QI_FTYPE_V4SI_V4SI_INT_QI:
37182 case QI_FTYPE_V2DF_V2DF_INT_QI:
37183 case QI_FTYPE_V4SF_V4SF_INT_QI:
37184 case DI_FTYPE_V64QI_V64QI_INT_DI:
37185 case SI_FTYPE_V32QI_V32QI_INT_SI:
37186 case HI_FTYPE_V16QI_V16QI_INT_HI:
37187 case SI_FTYPE_V32HI_V32HI_INT_SI:
37188 case HI_FTYPE_V16HI_V16HI_INT_HI:
37189 case QI_FTYPE_V8HI_V8HI_INT_QI:
37192 nargs_constant = 1;
37194 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37196 nargs_constant = 2;
37198 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37199 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37202 case QI_FTYPE_V8DI_V8DI_INT_QI:
37203 case HI_FTYPE_V16SI_V16SI_INT_HI:
37204 case QI_FTYPE_V8DF_V8DF_INT_QI:
37205 case HI_FTYPE_V16SF_V16SF_INT_HI:
37208 nargs_constant = 1;
37210 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37211 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37212 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37213 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37214 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37215 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37216 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37217 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37218 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37219 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37220 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37221 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37222 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37223 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37224 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37225 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37226 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37227 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37228 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37229 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37230 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37231 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37232 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37233 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37234 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37235 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37236 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37237 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37238 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37239 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37242 nargs_constant = 1;
37244 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37245 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37246 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37247 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37248 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37249 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37250 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37251 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37252 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37253 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37254 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37255 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37256 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37257 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37258 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37259 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37260 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37261 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37262 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37263 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37264 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37265 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37266 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37267 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37268 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37269 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37270 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37273 nargs_constant = 1;
37275 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37276 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37277 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37278 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37279 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37280 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37281 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37282 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37283 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37284 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37285 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37289 nargs_constant = 1;
37293 gcc_unreachable ();
37296 gcc_assert (nargs <= ARRAY_SIZE (args));
37298 if (comparison != UNKNOWN)
37300 gcc_assert (nargs == 2);
37301 return ix86_expand_sse_compare (d, exp, target, swap);
37304 if (rmode == VOIDmode || rmode == tmode)
37308 || GET_MODE (target) != tmode
37309 || !insn_p->operand[0].predicate (target, tmode))
37310 target = gen_reg_rtx (tmode);
37311 real_target = target;
37315 real_target = gen_reg_rtx (tmode);
37316 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37319 for (i = 0; i < nargs; i++)
37321 tree arg = CALL_EXPR_ARG (exp, i);
37322 rtx op = expand_normal (arg);
37323 machine_mode mode = insn_p->operand[i + 1].mode;
37324 bool match = insn_p->operand[i + 1].predicate (op, mode);
37326 if (last_arg_count && (i + 1) == nargs)
37328 /* SIMD shift insns take either an 8-bit immediate or
37329 register as count. But builtin functions take int as
37330 count. If count doesn't match, we put it in register. */
37333 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37334 if (!insn_p->operand[i + 1].predicate (op, mode))
37335 op = copy_to_reg (op);
37338 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37339 (!mask_pos && (nargs - i) <= nargs_constant))
37344 case CODE_FOR_avx_vinsertf128v4di:
37345 case CODE_FOR_avx_vextractf128v4di:
37346 error ("the last argument must be an 1-bit immediate");
37349 case CODE_FOR_avx512f_cmpv8di3_mask:
37350 case CODE_FOR_avx512f_cmpv16si3_mask:
37351 case CODE_FOR_avx512f_ucmpv8di3_mask:
37352 case CODE_FOR_avx512f_ucmpv16si3_mask:
37353 case CODE_FOR_avx512vl_cmpv4di3_mask:
37354 case CODE_FOR_avx512vl_cmpv8si3_mask:
37355 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37356 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37357 case CODE_FOR_avx512vl_cmpv2di3_mask:
37358 case CODE_FOR_avx512vl_cmpv4si3_mask:
37359 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37360 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37361 error ("the last argument must be a 3-bit immediate");
37364 case CODE_FOR_sse4_1_roundsd:
37365 case CODE_FOR_sse4_1_roundss:
37367 case CODE_FOR_sse4_1_roundpd:
37368 case CODE_FOR_sse4_1_roundps:
37369 case CODE_FOR_avx_roundpd256:
37370 case CODE_FOR_avx_roundps256:
37372 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37373 case CODE_FOR_sse4_1_roundps_sfix:
37374 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37375 case CODE_FOR_avx_roundps_sfix256:
37377 case CODE_FOR_sse4_1_blendps:
37378 case CODE_FOR_avx_blendpd256:
37379 case CODE_FOR_avx_vpermilv4df:
37380 case CODE_FOR_avx_vpermilv4df_mask:
37381 case CODE_FOR_avx512f_getmantv8df_mask:
37382 case CODE_FOR_avx512f_getmantv16sf_mask:
37383 case CODE_FOR_avx512vl_getmantv8sf_mask:
37384 case CODE_FOR_avx512vl_getmantv4df_mask:
37385 case CODE_FOR_avx512vl_getmantv4sf_mask:
37386 case CODE_FOR_avx512vl_getmantv2df_mask:
37387 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37388 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37389 case CODE_FOR_avx512dq_rangepv4df_mask:
37390 case CODE_FOR_avx512dq_rangepv8sf_mask:
37391 case CODE_FOR_avx512dq_rangepv2df_mask:
37392 case CODE_FOR_avx512dq_rangepv4sf_mask:
37393 case CODE_FOR_avx_shufpd256_mask:
37394 error ("the last argument must be a 4-bit immediate");
37397 case CODE_FOR_sha1rnds4:
37398 case CODE_FOR_sse4_1_blendpd:
37399 case CODE_FOR_avx_vpermilv2df:
37400 case CODE_FOR_avx_vpermilv2df_mask:
37401 case CODE_FOR_xop_vpermil2v2df3:
37402 case CODE_FOR_xop_vpermil2v4sf3:
37403 case CODE_FOR_xop_vpermil2v4df3:
37404 case CODE_FOR_xop_vpermil2v8sf3:
37405 case CODE_FOR_avx512f_vinsertf32x4_mask:
37406 case CODE_FOR_avx512f_vinserti32x4_mask:
37407 case CODE_FOR_avx512f_vextractf32x4_mask:
37408 case CODE_FOR_avx512f_vextracti32x4_mask:
37409 case CODE_FOR_sse2_shufpd:
37410 case CODE_FOR_sse2_shufpd_mask:
37411 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37412 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37413 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37414 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37415 error ("the last argument must be a 2-bit immediate");
37418 case CODE_FOR_avx_vextractf128v4df:
37419 case CODE_FOR_avx_vextractf128v8sf:
37420 case CODE_FOR_avx_vextractf128v8si:
37421 case CODE_FOR_avx_vinsertf128v4df:
37422 case CODE_FOR_avx_vinsertf128v8sf:
37423 case CODE_FOR_avx_vinsertf128v8si:
37424 case CODE_FOR_avx512f_vinsertf64x4_mask:
37425 case CODE_FOR_avx512f_vinserti64x4_mask:
37426 case CODE_FOR_avx512f_vextractf64x4_mask:
37427 case CODE_FOR_avx512f_vextracti64x4_mask:
37428 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37429 case CODE_FOR_avx512dq_vinserti32x8_mask:
37430 case CODE_FOR_avx512vl_vinsertv4df:
37431 case CODE_FOR_avx512vl_vinsertv4di:
37432 case CODE_FOR_avx512vl_vinsertv8sf:
37433 case CODE_FOR_avx512vl_vinsertv8si:
37434 error ("the last argument must be a 1-bit immediate");
37437 case CODE_FOR_avx_vmcmpv2df3:
37438 case CODE_FOR_avx_vmcmpv4sf3:
37439 case CODE_FOR_avx_cmpv2df3:
37440 case CODE_FOR_avx_cmpv4sf3:
37441 case CODE_FOR_avx_cmpv4df3:
37442 case CODE_FOR_avx_cmpv8sf3:
37443 case CODE_FOR_avx512f_cmpv8df3_mask:
37444 case CODE_FOR_avx512f_cmpv16sf3_mask:
37445 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37446 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37447 error ("the last argument must be a 5-bit immediate");
37451 switch (nargs_constant)
37454 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37455 (!mask_pos && (nargs - i) == nargs_constant))
37457 error ("the next to last argument must be an 8-bit immediate");
37461 error ("the last argument must be an 8-bit immediate");
37464 gcc_unreachable ();
37471 if (VECTOR_MODE_P (mode))
37472 op = safe_vector_operand (op, mode);
37474 /* If we aren't optimizing, only allow one memory operand to
37476 if (memory_operand (op, mode))
37479 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37481 if (optimize || !match || num_memory > 1)
37482 op = copy_to_mode_reg (mode, op);
37486 op = copy_to_reg (op);
37487 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37492 args[i].mode = mode;
37498 pat = GEN_FCN (icode) (real_target, args[0].op);
37501 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37504 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37508 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37509 args[2].op, args[3].op);
37512 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37513 args[2].op, args[3].op, args[4].op);
37515 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37516 args[2].op, args[3].op, args[4].op,
37520 gcc_unreachable ();
37530 /* Transform pattern of following layout:
37533 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37541 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37545 (parallel [ A B ... ]) */
37548 ix86_erase_embedded_rounding (rtx pat)
37550 if (GET_CODE (pat) == INSN)
37551 pat = PATTERN (pat);
37553 gcc_assert (GET_CODE (pat) == PARALLEL);
37555 if (XVECLEN (pat, 0) == 2)
37557 rtx p0 = XVECEXP (pat, 0, 0);
37558 rtx p1 = XVECEXP (pat, 0, 1);
37560 gcc_assert (GET_CODE (p0) == SET
37561 && GET_CODE (p1) == UNSPEC
37562 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37568 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37572 for (; i < XVECLEN (pat, 0); ++i)
37574 rtx elem = XVECEXP (pat, 0, i);
37575 if (GET_CODE (elem) != UNSPEC
37576 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37580 /* No more than 1 occurence was removed. */
37581 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37583 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37587 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37590 ix86_expand_sse_comi_round (const struct builtin_description *d,
37591 tree exp, rtx target)
37594 tree arg0 = CALL_EXPR_ARG (exp, 0);
37595 tree arg1 = CALL_EXPR_ARG (exp, 1);
37596 tree arg2 = CALL_EXPR_ARG (exp, 2);
37597 tree arg3 = CALL_EXPR_ARG (exp, 3);
37598 rtx op0 = expand_normal (arg0);
37599 rtx op1 = expand_normal (arg1);
37600 rtx op2 = expand_normal (arg2);
37601 rtx op3 = expand_normal (arg3);
37602 enum insn_code icode = d->icode;
37603 const struct insn_data_d *insn_p = &insn_data[icode];
37604 machine_mode mode0 = insn_p->operand[0].mode;
37605 machine_mode mode1 = insn_p->operand[1].mode;
37606 enum rtx_code comparison = UNEQ;
37607 bool need_ucomi = false;
37609 /* See avxintrin.h for values. */
37610 enum rtx_code comi_comparisons[32] =
37612 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37613 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37614 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37616 bool need_ucomi_values[32] =
37618 true, false, false, true, true, false, false, true,
37619 true, false, false, true, true, false, false, true,
37620 false, true, true, false, false, true, true, false,
37621 false, true, true, false, false, true, true, false
37624 if (!CONST_INT_P (op2))
37626 error ("the third argument must be comparison constant");
37629 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37631 error ("incorect comparison mode");
37635 if (!insn_p->operand[2].predicate (op3, SImode))
37637 error ("incorrect rounding operand");
37641 comparison = comi_comparisons[INTVAL (op2)];
37642 need_ucomi = need_ucomi_values[INTVAL (op2)];
37644 if (VECTOR_MODE_P (mode0))
37645 op0 = safe_vector_operand (op0, mode0);
37646 if (VECTOR_MODE_P (mode1))
37647 op1 = safe_vector_operand (op1, mode1);
37649 target = gen_reg_rtx (SImode);
37650 emit_move_insn (target, const0_rtx);
37651 target = gen_rtx_SUBREG (QImode, target, 0);
37653 if ((optimize && !register_operand (op0, mode0))
37654 || !insn_p->operand[0].predicate (op0, mode0))
37655 op0 = copy_to_mode_reg (mode0, op0);
37656 if ((optimize && !register_operand (op1, mode1))
37657 || !insn_p->operand[1].predicate (op1, mode1))
37658 op1 = copy_to_mode_reg (mode1, op1);
37661 icode = icode == CODE_FOR_sse_comi_round
37662 ? CODE_FOR_sse_ucomi_round
37663 : CODE_FOR_sse2_ucomi_round;
37665 pat = GEN_FCN (icode) (op0, op1, op3);
37669 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37670 if (INTVAL (op3) == NO_ROUND)
37672 pat = ix86_erase_embedded_rounding (pat);
37676 set_dst = SET_DEST (pat);
37680 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37681 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37685 emit_insn (gen_rtx_SET (VOIDmode,
37686 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37687 gen_rtx_fmt_ee (comparison, QImode,
37691 return SUBREG_REG (target);
37695 ix86_expand_round_builtin (const struct builtin_description *d,
37696 tree exp, rtx target)
37699 unsigned int i, nargs;
37705 enum insn_code icode = d->icode;
37706 const struct insn_data_d *insn_p = &insn_data[icode];
37707 machine_mode tmode = insn_p->operand[0].mode;
37708 unsigned int nargs_constant = 0;
37709 unsigned int redundant_embed_rnd = 0;
37711 switch ((enum ix86_builtin_func_type) d->flag)
37713 case UINT64_FTYPE_V2DF_INT:
37714 case UINT64_FTYPE_V4SF_INT:
37715 case UINT_FTYPE_V2DF_INT:
37716 case UINT_FTYPE_V4SF_INT:
37717 case INT64_FTYPE_V2DF_INT:
37718 case INT64_FTYPE_V4SF_INT:
37719 case INT_FTYPE_V2DF_INT:
37720 case INT_FTYPE_V4SF_INT:
37723 case V4SF_FTYPE_V4SF_UINT_INT:
37724 case V4SF_FTYPE_V4SF_UINT64_INT:
37725 case V2DF_FTYPE_V2DF_UINT64_INT:
37726 case V4SF_FTYPE_V4SF_INT_INT:
37727 case V4SF_FTYPE_V4SF_INT64_INT:
37728 case V2DF_FTYPE_V2DF_INT64_INT:
37729 case V4SF_FTYPE_V4SF_V4SF_INT:
37730 case V2DF_FTYPE_V2DF_V2DF_INT:
37731 case V4SF_FTYPE_V4SF_V2DF_INT:
37732 case V2DF_FTYPE_V2DF_V4SF_INT:
37735 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37736 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37737 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37738 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37739 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37740 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37741 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37742 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37743 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37744 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37745 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37746 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37747 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37748 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37751 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37752 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37753 nargs_constant = 2;
37756 case INT_FTYPE_V4SF_V4SF_INT_INT:
37757 case INT_FTYPE_V2DF_V2DF_INT_INT:
37758 return ix86_expand_sse_comi_round (d, exp, target);
37759 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37760 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37761 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37762 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37763 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37764 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37767 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37768 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37769 nargs_constant = 4;
37772 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37773 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37774 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37775 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37776 nargs_constant = 3;
37779 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37780 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37781 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37782 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37784 nargs_constant = 4;
37786 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37787 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37788 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37789 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37791 nargs_constant = 3;
37794 gcc_unreachable ();
37796 gcc_assert (nargs <= ARRAY_SIZE (args));
37800 || GET_MODE (target) != tmode
37801 || !insn_p->operand[0].predicate (target, tmode))
37802 target = gen_reg_rtx (tmode);
37804 for (i = 0; i < nargs; i++)
37806 tree arg = CALL_EXPR_ARG (exp, i);
37807 rtx op = expand_normal (arg);
37808 machine_mode mode = insn_p->operand[i + 1].mode;
37809 bool match = insn_p->operand[i + 1].predicate (op, mode);
37811 if (i == nargs - nargs_constant)
37817 case CODE_FOR_avx512f_getmantv8df_mask_round:
37818 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37819 case CODE_FOR_avx512f_vgetmantv2df_round:
37820 case CODE_FOR_avx512f_vgetmantv4sf_round:
37821 error ("the immediate argument must be a 4-bit immediate");
37823 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37824 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37825 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37826 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37827 error ("the immediate argument must be a 5-bit immediate");
37830 error ("the immediate argument must be an 8-bit immediate");
37835 else if (i == nargs-1)
37837 if (!insn_p->operand[nargs].predicate (op, SImode))
37839 error ("incorrect rounding operand");
37843 /* If there is no rounding use normal version of the pattern. */
37844 if (INTVAL (op) == NO_ROUND)
37845 redundant_embed_rnd = 1;
37849 if (VECTOR_MODE_P (mode))
37850 op = safe_vector_operand (op, mode);
37852 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37854 if (optimize || !match)
37855 op = copy_to_mode_reg (mode, op);
37859 op = copy_to_reg (op);
37860 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37865 args[i].mode = mode;
37871 pat = GEN_FCN (icode) (target, args[0].op);
37874 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37877 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37881 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37882 args[2].op, args[3].op);
37885 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37886 args[2].op, args[3].op, args[4].op);
37888 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37889 args[2].op, args[3].op, args[4].op,
37893 gcc_unreachable ();
37899 if (redundant_embed_rnd)
37900 pat = ix86_erase_embedded_rounding (pat);
37906 /* Subroutine of ix86_expand_builtin to take care of special insns
37907 with variable number of operands. */
37910 ix86_expand_special_args_builtin (const struct builtin_description *d,
37911 tree exp, rtx target)
37915 unsigned int i, nargs, arg_adjust, memory;
37916 bool aligned_mem = false;
37922 enum insn_code icode = d->icode;
37923 bool last_arg_constant = false;
37924 const struct insn_data_d *insn_p = &insn_data[icode];
37925 machine_mode tmode = insn_p->operand[0].mode;
37926 enum { load, store } klass;
37928 switch ((enum ix86_builtin_func_type) d->flag)
37930 case VOID_FTYPE_VOID:
37931 emit_insn (GEN_FCN (icode) (target));
37933 case VOID_FTYPE_UINT64:
37934 case VOID_FTYPE_UNSIGNED:
37940 case INT_FTYPE_VOID:
37941 case USHORT_FTYPE_VOID:
37942 case UINT64_FTYPE_VOID:
37943 case UNSIGNED_FTYPE_VOID:
37948 case UINT64_FTYPE_PUNSIGNED:
37949 case V2DI_FTYPE_PV2DI:
37950 case V4DI_FTYPE_PV4DI:
37951 case V32QI_FTYPE_PCCHAR:
37952 case V16QI_FTYPE_PCCHAR:
37953 case V8SF_FTYPE_PCV4SF:
37954 case V8SF_FTYPE_PCFLOAT:
37955 case V4SF_FTYPE_PCFLOAT:
37956 case V4DF_FTYPE_PCV2DF:
37957 case V4DF_FTYPE_PCDOUBLE:
37958 case V2DF_FTYPE_PCDOUBLE:
37959 case VOID_FTYPE_PVOID:
37960 case V16SI_FTYPE_PV4SI:
37961 case V16SF_FTYPE_PV4SF:
37962 case V8DI_FTYPE_PV4DI:
37963 case V8DI_FTYPE_PV8DI:
37964 case V8DF_FTYPE_PV4DF:
37970 case CODE_FOR_sse4_1_movntdqa:
37971 case CODE_FOR_avx2_movntdqa:
37972 case CODE_FOR_avx512f_movntdqa:
37973 aligned_mem = true;
37979 case VOID_FTYPE_PV2SF_V4SF:
37980 case VOID_FTYPE_PV8DI_V8DI:
37981 case VOID_FTYPE_PV4DI_V4DI:
37982 case VOID_FTYPE_PV2DI_V2DI:
37983 case VOID_FTYPE_PCHAR_V32QI:
37984 case VOID_FTYPE_PCHAR_V16QI:
37985 case VOID_FTYPE_PFLOAT_V16SF:
37986 case VOID_FTYPE_PFLOAT_V8SF:
37987 case VOID_FTYPE_PFLOAT_V4SF:
37988 case VOID_FTYPE_PDOUBLE_V8DF:
37989 case VOID_FTYPE_PDOUBLE_V4DF:
37990 case VOID_FTYPE_PDOUBLE_V2DF:
37991 case VOID_FTYPE_PLONGLONG_LONGLONG:
37992 case VOID_FTYPE_PULONGLONG_ULONGLONG:
37993 case VOID_FTYPE_PINT_INT:
37996 /* Reserve memory operand for target. */
37997 memory = ARRAY_SIZE (args);
38000 /* These builtins and instructions require the memory
38001 to be properly aligned. */
38002 case CODE_FOR_avx_movntv4di:
38003 case CODE_FOR_sse2_movntv2di:
38004 case CODE_FOR_avx_movntv8sf:
38005 case CODE_FOR_sse_movntv4sf:
38006 case CODE_FOR_sse4a_vmmovntv4sf:
38007 case CODE_FOR_avx_movntv4df:
38008 case CODE_FOR_sse2_movntv2df:
38009 case CODE_FOR_sse4a_vmmovntv2df:
38010 case CODE_FOR_sse2_movntidi:
38011 case CODE_FOR_sse_movntq:
38012 case CODE_FOR_sse2_movntisi:
38013 case CODE_FOR_avx512f_movntv16sf:
38014 case CODE_FOR_avx512f_movntv8df:
38015 case CODE_FOR_avx512f_movntv8di:
38016 aligned_mem = true;
38022 case V4SF_FTYPE_V4SF_PCV2SF:
38023 case V2DF_FTYPE_V2DF_PCDOUBLE:
38028 case V8SF_FTYPE_PCV8SF_V8SI:
38029 case V4DF_FTYPE_PCV4DF_V4DI:
38030 case V4SF_FTYPE_PCV4SF_V4SI:
38031 case V2DF_FTYPE_PCV2DF_V2DI:
38032 case V8SI_FTYPE_PCV8SI_V8SI:
38033 case V4DI_FTYPE_PCV4DI_V4DI:
38034 case V4SI_FTYPE_PCV4SI_V4SI:
38035 case V2DI_FTYPE_PCV2DI_V2DI:
38040 case VOID_FTYPE_PV8DF_V8DF_QI:
38041 case VOID_FTYPE_PV16SF_V16SF_HI:
38042 case VOID_FTYPE_PV8DI_V8DI_QI:
38043 case VOID_FTYPE_PV4DI_V4DI_QI:
38044 case VOID_FTYPE_PV2DI_V2DI_QI:
38045 case VOID_FTYPE_PV16SI_V16SI_HI:
38046 case VOID_FTYPE_PV8SI_V8SI_QI:
38047 case VOID_FTYPE_PV4SI_V4SI_QI:
38050 /* These builtins and instructions require the memory
38051 to be properly aligned. */
38052 case CODE_FOR_avx512f_storev16sf_mask:
38053 case CODE_FOR_avx512f_storev16si_mask:
38054 case CODE_FOR_avx512f_storev8df_mask:
38055 case CODE_FOR_avx512f_storev8di_mask:
38056 case CODE_FOR_avx512vl_storev8sf_mask:
38057 case CODE_FOR_avx512vl_storev8si_mask:
38058 case CODE_FOR_avx512vl_storev4df_mask:
38059 case CODE_FOR_avx512vl_storev4di_mask:
38060 case CODE_FOR_avx512vl_storev4sf_mask:
38061 case CODE_FOR_avx512vl_storev4si_mask:
38062 case CODE_FOR_avx512vl_storev2df_mask:
38063 case CODE_FOR_avx512vl_storev2di_mask:
38064 aligned_mem = true;
38070 case VOID_FTYPE_PV8SF_V8SI_V8SF:
38071 case VOID_FTYPE_PV4DF_V4DI_V4DF:
38072 case VOID_FTYPE_PV4SF_V4SI_V4SF:
38073 case VOID_FTYPE_PV2DF_V2DI_V2DF:
38074 case VOID_FTYPE_PV8SI_V8SI_V8SI:
38075 case VOID_FTYPE_PV4DI_V4DI_V4DI:
38076 case VOID_FTYPE_PV4SI_V4SI_V4SI:
38077 case VOID_FTYPE_PV2DI_V2DI_V2DI:
38078 case VOID_FTYPE_PDOUBLE_V2DF_QI:
38079 case VOID_FTYPE_PFLOAT_V4SF_QI:
38080 case VOID_FTYPE_PV8SI_V8DI_QI:
38081 case VOID_FTYPE_PV8HI_V8DI_QI:
38082 case VOID_FTYPE_PV16HI_V16SI_HI:
38083 case VOID_FTYPE_PV16QI_V8DI_QI:
38084 case VOID_FTYPE_PV16QI_V16SI_HI:
38085 case VOID_FTYPE_PV4SI_V4DI_QI:
38086 case VOID_FTYPE_PV4SI_V2DI_QI:
38087 case VOID_FTYPE_PV8HI_V4DI_QI:
38088 case VOID_FTYPE_PV8HI_V2DI_QI:
38089 case VOID_FTYPE_PV8HI_V8SI_QI:
38090 case VOID_FTYPE_PV8HI_V4SI_QI:
38091 case VOID_FTYPE_PV16QI_V4DI_QI:
38092 case VOID_FTYPE_PV16QI_V2DI_QI:
38093 case VOID_FTYPE_PV16QI_V8SI_QI:
38094 case VOID_FTYPE_PV16QI_V4SI_QI:
38095 case VOID_FTYPE_PV8HI_V8HI_QI:
38096 case VOID_FTYPE_PV16HI_V16HI_HI:
38097 case VOID_FTYPE_PV32HI_V32HI_SI:
38098 case VOID_FTYPE_PV16QI_V16QI_HI:
38099 case VOID_FTYPE_PV32QI_V32QI_SI:
38100 case VOID_FTYPE_PV64QI_V64QI_DI:
38101 case VOID_FTYPE_PV4DF_V4DF_QI:
38102 case VOID_FTYPE_PV2DF_V2DF_QI:
38103 case VOID_FTYPE_PV8SF_V8SF_QI:
38104 case VOID_FTYPE_PV4SF_V4SF_QI:
38107 /* Reserve memory operand for target. */
38108 memory = ARRAY_SIZE (args);
38110 case V4SF_FTYPE_PCV4SF_V4SF_QI:
38111 case V8SF_FTYPE_PCV8SF_V8SF_QI:
38112 case V16SF_FTYPE_PCV16SF_V16SF_HI:
38113 case V4SI_FTYPE_PCV4SI_V4SI_QI:
38114 case V8SI_FTYPE_PCV8SI_V8SI_QI:
38115 case V16SI_FTYPE_PCV16SI_V16SI_HI:
38116 case V2DF_FTYPE_PCV2DF_V2DF_QI:
38117 case V4DF_FTYPE_PCV4DF_V4DF_QI:
38118 case V8DF_FTYPE_PCV8DF_V8DF_QI:
38119 case V2DI_FTYPE_PCV2DI_V2DI_QI:
38120 case V4DI_FTYPE_PCV4DI_V4DI_QI:
38121 case V8DI_FTYPE_PCV8DI_V8DI_QI:
38122 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
38123 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
38124 case V8HI_FTYPE_PCV8HI_V8HI_QI:
38125 case V16HI_FTYPE_PCV16HI_V16HI_HI:
38126 case V32HI_FTYPE_PCV32HI_V32HI_SI:
38127 case V16QI_FTYPE_PCV16QI_V16QI_HI:
38128 case V32QI_FTYPE_PCV32QI_V32QI_SI:
38129 case V64QI_FTYPE_PCV64QI_V64QI_DI:
38135 /* These builtins and instructions require the memory
38136 to be properly aligned. */
38137 case CODE_FOR_avx512f_loadv16sf_mask:
38138 case CODE_FOR_avx512f_loadv16si_mask:
38139 case CODE_FOR_avx512f_loadv8df_mask:
38140 case CODE_FOR_avx512f_loadv8di_mask:
38141 case CODE_FOR_avx512vl_loadv8sf_mask:
38142 case CODE_FOR_avx512vl_loadv8si_mask:
38143 case CODE_FOR_avx512vl_loadv4df_mask:
38144 case CODE_FOR_avx512vl_loadv4di_mask:
38145 case CODE_FOR_avx512vl_loadv4sf_mask:
38146 case CODE_FOR_avx512vl_loadv4si_mask:
38147 case CODE_FOR_avx512vl_loadv2df_mask:
38148 case CODE_FOR_avx512vl_loadv2di_mask:
38149 case CODE_FOR_avx512bw_loadv64qi_mask:
38150 case CODE_FOR_avx512vl_loadv32qi_mask:
38151 case CODE_FOR_avx512vl_loadv16qi_mask:
38152 case CODE_FOR_avx512bw_loadv32hi_mask:
38153 case CODE_FOR_avx512vl_loadv16hi_mask:
38154 case CODE_FOR_avx512vl_loadv8hi_mask:
38155 aligned_mem = true;
38161 case VOID_FTYPE_UINT_UINT_UINT:
38162 case VOID_FTYPE_UINT64_UINT_UINT:
38163 case UCHAR_FTYPE_UINT_UINT_UINT:
38164 case UCHAR_FTYPE_UINT64_UINT_UINT:
38167 memory = ARRAY_SIZE (args);
38168 last_arg_constant = true;
38171 gcc_unreachable ();
38174 gcc_assert (nargs <= ARRAY_SIZE (args));
38176 if (klass == store)
38178 arg = CALL_EXPR_ARG (exp, 0);
38179 op = expand_normal (arg);
38180 gcc_assert (target == 0);
38183 op = ix86_zero_extend_to_Pmode (op);
38184 target = gen_rtx_MEM (tmode, op);
38185 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38186 on it. Try to improve it using get_pointer_alignment,
38187 and if the special builtin is one that requires strict
38188 mode alignment, also from it's GET_MODE_ALIGNMENT.
38189 Failure to do so could lead to ix86_legitimate_combined_insn
38190 rejecting all changes to such insns. */
38191 unsigned int align = get_pointer_alignment (arg);
38192 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38193 align = GET_MODE_ALIGNMENT (tmode);
38194 if (MEM_ALIGN (target) < align)
38195 set_mem_align (target, align);
38198 target = force_reg (tmode, op);
38206 || !register_operand (target, tmode)
38207 || GET_MODE (target) != tmode)
38208 target = gen_reg_rtx (tmode);
38211 for (i = 0; i < nargs; i++)
38213 machine_mode mode = insn_p->operand[i + 1].mode;
38216 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38217 op = expand_normal (arg);
38218 match = insn_p->operand[i + 1].predicate (op, mode);
38220 if (last_arg_constant && (i + 1) == nargs)
38224 if (icode == CODE_FOR_lwp_lwpvalsi3
38225 || icode == CODE_FOR_lwp_lwpinssi3
38226 || icode == CODE_FOR_lwp_lwpvaldi3
38227 || icode == CODE_FOR_lwp_lwpinsdi3)
38228 error ("the last argument must be a 32-bit immediate");
38230 error ("the last argument must be an 8-bit immediate");
38238 /* This must be the memory operand. */
38239 op = ix86_zero_extend_to_Pmode (op);
38240 op = gen_rtx_MEM (mode, op);
38241 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38242 on it. Try to improve it using get_pointer_alignment,
38243 and if the special builtin is one that requires strict
38244 mode alignment, also from it's GET_MODE_ALIGNMENT.
38245 Failure to do so could lead to ix86_legitimate_combined_insn
38246 rejecting all changes to such insns. */
38247 unsigned int align = get_pointer_alignment (arg);
38248 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38249 align = GET_MODE_ALIGNMENT (mode);
38250 if (MEM_ALIGN (op) < align)
38251 set_mem_align (op, align);
38255 /* This must be register. */
38256 if (VECTOR_MODE_P (mode))
38257 op = safe_vector_operand (op, mode);
38259 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38260 op = copy_to_mode_reg (mode, op);
38263 op = copy_to_reg (op);
38264 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38270 args[i].mode = mode;
38276 pat = GEN_FCN (icode) (target);
38279 pat = GEN_FCN (icode) (target, args[0].op);
38282 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38285 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38288 gcc_unreachable ();
38294 return klass == store ? 0 : target;
38297 /* Return the integer constant in ARG. Constrain it to be in the range
38298 of the subparts of VEC_TYPE; issue an error if not. */
38301 get_element_number (tree vec_type, tree arg)
38303 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38305 if (!tree_fits_uhwi_p (arg)
38306 || (elt = tree_to_uhwi (arg), elt > max))
38308 error ("selector must be an integer constant in the range 0..%wi", max);
38315 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38316 ix86_expand_vector_init. We DO have language-level syntax for this, in
38317 the form of (type){ init-list }. Except that since we can't place emms
38318 instructions from inside the compiler, we can't allow the use of MMX
38319 registers unless the user explicitly asks for it. So we do *not* define
38320 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38321 we have builtins invoked by mmintrin.h that gives us license to emit
38322 these sorts of instructions. */
38325 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38327 machine_mode tmode = TYPE_MODE (type);
38328 machine_mode inner_mode = GET_MODE_INNER (tmode);
38329 int i, n_elt = GET_MODE_NUNITS (tmode);
38330 rtvec v = rtvec_alloc (n_elt);
38332 gcc_assert (VECTOR_MODE_P (tmode));
38333 gcc_assert (call_expr_nargs (exp) == n_elt);
38335 for (i = 0; i < n_elt; ++i)
38337 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38338 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38341 if (!target || !register_operand (target, tmode))
38342 target = gen_reg_rtx (tmode);
38344 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38348 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38349 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38350 had a language-level syntax for referencing vector elements. */
38353 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38355 machine_mode tmode, mode0;
38360 arg0 = CALL_EXPR_ARG (exp, 0);
38361 arg1 = CALL_EXPR_ARG (exp, 1);
38363 op0 = expand_normal (arg0);
38364 elt = get_element_number (TREE_TYPE (arg0), arg1);
38366 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38367 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38368 gcc_assert (VECTOR_MODE_P (mode0));
38370 op0 = force_reg (mode0, op0);
38372 if (optimize || !target || !register_operand (target, tmode))
38373 target = gen_reg_rtx (tmode);
38375 ix86_expand_vector_extract (true, target, op0, elt);
38380 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38381 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38382 a language-level syntax for referencing vector elements. */
38385 ix86_expand_vec_set_builtin (tree exp)
38387 machine_mode tmode, mode1;
38388 tree arg0, arg1, arg2;
38390 rtx op0, op1, target;
38392 arg0 = CALL_EXPR_ARG (exp, 0);
38393 arg1 = CALL_EXPR_ARG (exp, 1);
38394 arg2 = CALL_EXPR_ARG (exp, 2);
38396 tmode = TYPE_MODE (TREE_TYPE (arg0));
38397 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38398 gcc_assert (VECTOR_MODE_P (tmode));
38400 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38401 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38402 elt = get_element_number (TREE_TYPE (arg0), arg2);
38404 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38405 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38407 op0 = force_reg (tmode, op0);
38408 op1 = force_reg (mode1, op1);
38410 /* OP0 is the source of these builtin functions and shouldn't be
38411 modified. Create a copy, use it and return it as target. */
38412 target = gen_reg_rtx (tmode);
38413 emit_move_insn (target, op0);
38414 ix86_expand_vector_set (true, target, op1, elt);
38419 /* Emit conditional move of SRC to DST with condition
38422 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38428 t = ix86_expand_compare (code, op1, op2);
38429 emit_insn (gen_rtx_SET (VOIDmode, dst,
38430 gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38435 rtx nomove = gen_label_rtx ();
38436 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38437 const0_rtx, GET_MODE (op1), 1, nomove);
38438 emit_move_insn (dst, src);
38439 emit_label (nomove);
38443 /* Choose max of DST and SRC and put it to DST. */
38445 ix86_emit_move_max (rtx dst, rtx src)
38447 ix86_emit_cmove (dst, src, LTU, dst, src);
38450 /* Expand an expression EXP that calls a built-in function,
38451 with result going to TARGET if that's convenient
38452 (and in mode MODE if that's convenient).
38453 SUBTARGET may be used as the target for computing one of EXP's operands.
38454 IGNORE is nonzero if the value is to be ignored. */
38457 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38458 machine_mode mode, int ignore)
38460 const struct builtin_description *d;
38462 enum insn_code icode;
38463 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38464 tree arg0, arg1, arg2, arg3, arg4;
38465 rtx op0, op1, op2, op3, op4, pat, insn;
38466 machine_mode mode0, mode1, mode2, mode3, mode4;
38467 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38469 /* For CPU builtins that can be folded, fold first and expand the fold. */
38472 case IX86_BUILTIN_CPU_INIT:
38474 /* Make it call __cpu_indicator_init in libgcc. */
38475 tree call_expr, fndecl, type;
38476 type = build_function_type_list (integer_type_node, NULL_TREE);
38477 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38478 call_expr = build_call_expr (fndecl, 0);
38479 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38481 case IX86_BUILTIN_CPU_IS:
38482 case IX86_BUILTIN_CPU_SUPPORTS:
38484 tree arg0 = CALL_EXPR_ARG (exp, 0);
38485 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38486 gcc_assert (fold_expr != NULL_TREE);
38487 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38491 /* Determine whether the builtin function is available under the current ISA.
38492 Originally the builtin was not created if it wasn't applicable to the
38493 current ISA based on the command line switches. With function specific
38494 options, we need to check in the context of the function making the call
38495 whether it is supported. */
38496 if (ix86_builtins_isa[fcode].isa
38497 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38499 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38500 NULL, (enum fpmath_unit) 0, false);
38503 error ("%qE needs unknown isa option", fndecl);
38506 gcc_assert (opts != NULL);
38507 error ("%qE needs isa option %s", fndecl, opts);
38515 case IX86_BUILTIN_BNDMK:
38517 || GET_MODE (target) != BNDmode
38518 || !register_operand (target, BNDmode))
38519 target = gen_reg_rtx (BNDmode);
38521 arg0 = CALL_EXPR_ARG (exp, 0);
38522 arg1 = CALL_EXPR_ARG (exp, 1);
38524 op0 = expand_normal (arg0);
38525 op1 = expand_normal (arg1);
38527 if (!register_operand (op0, Pmode))
38528 op0 = ix86_zero_extend_to_Pmode (op0);
38529 if (!register_operand (op1, Pmode))
38530 op1 = ix86_zero_extend_to_Pmode (op1);
38532 /* Builtin arg1 is size of block but instruction op1 should
38534 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38535 NULL_RTX, 1, OPTAB_DIRECT);
38537 emit_insn (BNDmode == BND64mode
38538 ? gen_bnd64_mk (target, op0, op1)
38539 : gen_bnd32_mk (target, op0, op1));
38542 case IX86_BUILTIN_BNDSTX:
38543 arg0 = CALL_EXPR_ARG (exp, 0);
38544 arg1 = CALL_EXPR_ARG (exp, 1);
38545 arg2 = CALL_EXPR_ARG (exp, 2);
38547 op0 = expand_normal (arg0);
38548 op1 = expand_normal (arg1);
38549 op2 = expand_normal (arg2);
38551 if (!register_operand (op0, Pmode))
38552 op0 = ix86_zero_extend_to_Pmode (op0);
38553 if (!register_operand (op1, BNDmode))
38554 op1 = copy_to_mode_reg (BNDmode, op1);
38555 if (!register_operand (op2, Pmode))
38556 op2 = ix86_zero_extend_to_Pmode (op2);
38558 emit_insn (BNDmode == BND64mode
38559 ? gen_bnd64_stx (op2, op0, op1)
38560 : gen_bnd32_stx (op2, op0, op1));
38563 case IX86_BUILTIN_BNDLDX:
38565 || GET_MODE (target) != BNDmode
38566 || !register_operand (target, BNDmode))
38567 target = gen_reg_rtx (BNDmode);
38569 arg0 = CALL_EXPR_ARG (exp, 0);
38570 arg1 = CALL_EXPR_ARG (exp, 1);
38572 op0 = expand_normal (arg0);
38573 op1 = expand_normal (arg1);
38575 if (!register_operand (op0, Pmode))
38576 op0 = ix86_zero_extend_to_Pmode (op0);
38577 if (!register_operand (op1, Pmode))
38578 op1 = ix86_zero_extend_to_Pmode (op1);
38580 emit_insn (BNDmode == BND64mode
38581 ? gen_bnd64_ldx (target, op0, op1)
38582 : gen_bnd32_ldx (target, op0, op1));
38585 case IX86_BUILTIN_BNDCL:
38586 arg0 = CALL_EXPR_ARG (exp, 0);
38587 arg1 = CALL_EXPR_ARG (exp, 1);
38589 op0 = expand_normal (arg0);
38590 op1 = expand_normal (arg1);
38592 if (!register_operand (op0, Pmode))
38593 op0 = ix86_zero_extend_to_Pmode (op0);
38594 if (!register_operand (op1, BNDmode))
38595 op1 = copy_to_mode_reg (BNDmode, op1);
38597 emit_insn (BNDmode == BND64mode
38598 ? gen_bnd64_cl (op1, op0)
38599 : gen_bnd32_cl (op1, op0));
38602 case IX86_BUILTIN_BNDCU:
38603 arg0 = CALL_EXPR_ARG (exp, 0);
38604 arg1 = CALL_EXPR_ARG (exp, 1);
38606 op0 = expand_normal (arg0);
38607 op1 = expand_normal (arg1);
38609 if (!register_operand (op0, Pmode))
38610 op0 = ix86_zero_extend_to_Pmode (op0);
38611 if (!register_operand (op1, BNDmode))
38612 op1 = copy_to_mode_reg (BNDmode, op1);
38614 emit_insn (BNDmode == BND64mode
38615 ? gen_bnd64_cu (op1, op0)
38616 : gen_bnd32_cu (op1, op0));
38619 case IX86_BUILTIN_BNDRET:
38620 arg0 = CALL_EXPR_ARG (exp, 0);
38621 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38622 target = chkp_get_rtl_bounds (arg0);
38624 /* If no bounds were specified for returned value,
38625 then use INIT bounds. It usually happens when
38626 some built-in function is expanded. */
38629 rtx t1 = gen_reg_rtx (Pmode);
38630 rtx t2 = gen_reg_rtx (Pmode);
38631 target = gen_reg_rtx (BNDmode);
38632 emit_move_insn (t1, const0_rtx);
38633 emit_move_insn (t2, constm1_rtx);
38634 emit_insn (BNDmode == BND64mode
38635 ? gen_bnd64_mk (target, t1, t2)
38636 : gen_bnd32_mk (target, t1, t2));
38639 gcc_assert (target && REG_P (target));
38642 case IX86_BUILTIN_BNDNARROW:
38644 rtx m1, m1h1, m1h2, lb, ub, t1;
38646 /* Return value and lb. */
38647 arg0 = CALL_EXPR_ARG (exp, 0);
38649 arg1 = CALL_EXPR_ARG (exp, 1);
38651 arg2 = CALL_EXPR_ARG (exp, 2);
38653 lb = expand_normal (arg0);
38654 op1 = expand_normal (arg1);
38655 op2 = expand_normal (arg2);
38657 /* Size was passed but we need to use (size - 1) as for bndmk. */
38658 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38659 NULL_RTX, 1, OPTAB_DIRECT);
38661 /* Add LB to size and inverse to get UB. */
38662 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38663 op2, 1, OPTAB_DIRECT);
38664 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38666 if (!register_operand (lb, Pmode))
38667 lb = ix86_zero_extend_to_Pmode (lb);
38668 if (!register_operand (ub, Pmode))
38669 ub = ix86_zero_extend_to_Pmode (ub);
38671 /* We need to move bounds to memory before any computations. */
38676 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38677 emit_move_insn (m1, op1);
38680 /* Generate mem expression to be used for access to LB and UB. */
38681 m1h1 = adjust_address (m1, Pmode, 0);
38682 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38684 t1 = gen_reg_rtx (Pmode);
38687 emit_move_insn (t1, m1h1);
38688 ix86_emit_move_max (t1, lb);
38689 emit_move_insn (m1h1, t1);
38691 /* Compute UB. UB is stored in 1's complement form. Therefore
38692 we also use max here. */
38693 emit_move_insn (t1, m1h2);
38694 ix86_emit_move_max (t1, ub);
38695 emit_move_insn (m1h2, t1);
38697 op2 = gen_reg_rtx (BNDmode);
38698 emit_move_insn (op2, m1);
38700 return chkp_join_splitted_slot (lb, op2);
38703 case IX86_BUILTIN_BNDINT:
38705 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38708 || GET_MODE (target) != BNDmode
38709 || !register_operand (target, BNDmode))
38710 target = gen_reg_rtx (BNDmode);
38712 arg0 = CALL_EXPR_ARG (exp, 0);
38713 arg1 = CALL_EXPR_ARG (exp, 1);
38715 op0 = expand_normal (arg0);
38716 op1 = expand_normal (arg1);
38718 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38719 rh1 = adjust_address (res, Pmode, 0);
38720 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38722 /* Put first bounds to temporaries. */
38723 lb1 = gen_reg_rtx (Pmode);
38724 ub1 = gen_reg_rtx (Pmode);
38727 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38728 emit_move_insn (ub1, adjust_address (op0, Pmode,
38729 GET_MODE_SIZE (Pmode)));
38733 emit_move_insn (res, op0);
38734 emit_move_insn (lb1, rh1);
38735 emit_move_insn (ub1, rh2);
38738 /* Put second bounds to temporaries. */
38739 lb2 = gen_reg_rtx (Pmode);
38740 ub2 = gen_reg_rtx (Pmode);
38743 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38744 emit_move_insn (ub2, adjust_address (op1, Pmode,
38745 GET_MODE_SIZE (Pmode)));
38749 emit_move_insn (res, op1);
38750 emit_move_insn (lb2, rh1);
38751 emit_move_insn (ub2, rh2);
38755 ix86_emit_move_max (lb1, lb2);
38756 emit_move_insn (rh1, lb1);
38758 /* Compute UB. UB is stored in 1's complement form. Therefore
38759 we also use max here. */
38760 ix86_emit_move_max (ub1, ub2);
38761 emit_move_insn (rh2, ub1);
38763 emit_move_insn (target, res);
38768 case IX86_BUILTIN_SIZEOF:
38774 || GET_MODE (target) != Pmode
38775 || !register_operand (target, Pmode))
38776 target = gen_reg_rtx (Pmode);
38778 arg0 = CALL_EXPR_ARG (exp, 0);
38779 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38781 name = DECL_ASSEMBLER_NAME (arg0);
38782 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38784 emit_insn (Pmode == SImode
38785 ? gen_move_size_reloc_si (target, symbol)
38786 : gen_move_size_reloc_di (target, symbol));
38791 case IX86_BUILTIN_BNDLOWER:
38796 || GET_MODE (target) != Pmode
38797 || !register_operand (target, Pmode))
38798 target = gen_reg_rtx (Pmode);
38800 arg0 = CALL_EXPR_ARG (exp, 0);
38801 op0 = expand_normal (arg0);
38803 /* We need to move bounds to memory first. */
38808 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38809 emit_move_insn (mem, op0);
38812 /* Generate mem expression to access LB and load it. */
38813 hmem = adjust_address (mem, Pmode, 0);
38814 emit_move_insn (target, hmem);
38819 case IX86_BUILTIN_BNDUPPER:
38821 rtx mem, hmem, res;
38824 || GET_MODE (target) != Pmode
38825 || !register_operand (target, Pmode))
38826 target = gen_reg_rtx (Pmode);
38828 arg0 = CALL_EXPR_ARG (exp, 0);
38829 op0 = expand_normal (arg0);
38831 /* We need to move bounds to memory first. */
38836 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38837 emit_move_insn (mem, op0);
38840 /* Generate mem expression to access UB. */
38841 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
38843 /* We need to inverse all bits of UB. */
38844 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
38847 emit_move_insn (target, res);
38852 case IX86_BUILTIN_MASKMOVQ:
38853 case IX86_BUILTIN_MASKMOVDQU:
38854 icode = (fcode == IX86_BUILTIN_MASKMOVQ
38855 ? CODE_FOR_mmx_maskmovq
38856 : CODE_FOR_sse2_maskmovdqu);
38857 /* Note the arg order is different from the operand order. */
38858 arg1 = CALL_EXPR_ARG (exp, 0);
38859 arg2 = CALL_EXPR_ARG (exp, 1);
38860 arg0 = CALL_EXPR_ARG (exp, 2);
38861 op0 = expand_normal (arg0);
38862 op1 = expand_normal (arg1);
38863 op2 = expand_normal (arg2);
38864 mode0 = insn_data[icode].operand[0].mode;
38865 mode1 = insn_data[icode].operand[1].mode;
38866 mode2 = insn_data[icode].operand[2].mode;
38868 op0 = ix86_zero_extend_to_Pmode (op0);
38869 op0 = gen_rtx_MEM (mode1, op0);
38871 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38872 op0 = copy_to_mode_reg (mode0, op0);
38873 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38874 op1 = copy_to_mode_reg (mode1, op1);
38875 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38876 op2 = copy_to_mode_reg (mode2, op2);
38877 pat = GEN_FCN (icode) (op0, op1, op2);
38883 case IX86_BUILTIN_LDMXCSR:
38884 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
38885 target = assign_386_stack_local (SImode, SLOT_TEMP);
38886 emit_move_insn (target, op0);
38887 emit_insn (gen_sse_ldmxcsr (target));
38890 case IX86_BUILTIN_STMXCSR:
38891 target = assign_386_stack_local (SImode, SLOT_TEMP);
38892 emit_insn (gen_sse_stmxcsr (target));
38893 return copy_to_mode_reg (SImode, target);
38895 case IX86_BUILTIN_CLFLUSH:
38896 arg0 = CALL_EXPR_ARG (exp, 0);
38897 op0 = expand_normal (arg0);
38898 icode = CODE_FOR_sse2_clflush;
38899 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38900 op0 = ix86_zero_extend_to_Pmode (op0);
38902 emit_insn (gen_sse2_clflush (op0));
38905 case IX86_BUILTIN_CLWB:
38906 arg0 = CALL_EXPR_ARG (exp, 0);
38907 op0 = expand_normal (arg0);
38908 icode = CODE_FOR_clwb;
38909 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38910 op0 = ix86_zero_extend_to_Pmode (op0);
38912 emit_insn (gen_clwb (op0));
38915 case IX86_BUILTIN_CLFLUSHOPT:
38916 arg0 = CALL_EXPR_ARG (exp, 0);
38917 op0 = expand_normal (arg0);
38918 icode = CODE_FOR_clflushopt;
38919 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38920 op0 = ix86_zero_extend_to_Pmode (op0);
38922 emit_insn (gen_clflushopt (op0));
38925 case IX86_BUILTIN_MONITOR:
38926 arg0 = CALL_EXPR_ARG (exp, 0);
38927 arg1 = CALL_EXPR_ARG (exp, 1);
38928 arg2 = CALL_EXPR_ARG (exp, 2);
38929 op0 = expand_normal (arg0);
38930 op1 = expand_normal (arg1);
38931 op2 = expand_normal (arg2);
38933 op0 = ix86_zero_extend_to_Pmode (op0);
38935 op1 = copy_to_mode_reg (SImode, op1);
38937 op2 = copy_to_mode_reg (SImode, op2);
38938 emit_insn (ix86_gen_monitor (op0, op1, op2));
38941 case IX86_BUILTIN_MWAIT:
38942 arg0 = CALL_EXPR_ARG (exp, 0);
38943 arg1 = CALL_EXPR_ARG (exp, 1);
38944 op0 = expand_normal (arg0);
38945 op1 = expand_normal (arg1);
38947 op0 = copy_to_mode_reg (SImode, op0);
38949 op1 = copy_to_mode_reg (SImode, op1);
38950 emit_insn (gen_sse3_mwait (op0, op1));
38953 case IX86_BUILTIN_VEC_INIT_V2SI:
38954 case IX86_BUILTIN_VEC_INIT_V4HI:
38955 case IX86_BUILTIN_VEC_INIT_V8QI:
38956 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
38958 case IX86_BUILTIN_VEC_EXT_V2DF:
38959 case IX86_BUILTIN_VEC_EXT_V2DI:
38960 case IX86_BUILTIN_VEC_EXT_V4SF:
38961 case IX86_BUILTIN_VEC_EXT_V4SI:
38962 case IX86_BUILTIN_VEC_EXT_V8HI:
38963 case IX86_BUILTIN_VEC_EXT_V2SI:
38964 case IX86_BUILTIN_VEC_EXT_V4HI:
38965 case IX86_BUILTIN_VEC_EXT_V16QI:
38966 return ix86_expand_vec_ext_builtin (exp, target);
38968 case IX86_BUILTIN_VEC_SET_V2DI:
38969 case IX86_BUILTIN_VEC_SET_V4SF:
38970 case IX86_BUILTIN_VEC_SET_V4SI:
38971 case IX86_BUILTIN_VEC_SET_V8HI:
38972 case IX86_BUILTIN_VEC_SET_V4HI:
38973 case IX86_BUILTIN_VEC_SET_V16QI:
38974 return ix86_expand_vec_set_builtin (exp);
38976 case IX86_BUILTIN_INFQ:
38977 case IX86_BUILTIN_HUGE_VALQ:
38979 REAL_VALUE_TYPE inf;
38983 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
38985 tmp = validize_mem (force_const_mem (mode, tmp));
38988 target = gen_reg_rtx (mode);
38990 emit_move_insn (target, tmp);
38994 case IX86_BUILTIN_RDPMC:
38995 case IX86_BUILTIN_RDTSC:
38996 case IX86_BUILTIN_RDTSCP:
38998 op0 = gen_reg_rtx (DImode);
38999 op1 = gen_reg_rtx (DImode);
39001 if (fcode == IX86_BUILTIN_RDPMC)
39003 arg0 = CALL_EXPR_ARG (exp, 0);
39004 op2 = expand_normal (arg0);
39005 if (!register_operand (op2, SImode))
39006 op2 = copy_to_mode_reg (SImode, op2);
39008 insn = (TARGET_64BIT
39009 ? gen_rdpmc_rex64 (op0, op1, op2)
39010 : gen_rdpmc (op0, op2));
39013 else if (fcode == IX86_BUILTIN_RDTSC)
39015 insn = (TARGET_64BIT
39016 ? gen_rdtsc_rex64 (op0, op1)
39017 : gen_rdtsc (op0));
39022 op2 = gen_reg_rtx (SImode);
39024 insn = (TARGET_64BIT
39025 ? gen_rdtscp_rex64 (op0, op1, op2)
39026 : gen_rdtscp (op0, op2));
39029 arg0 = CALL_EXPR_ARG (exp, 0);
39030 op4 = expand_normal (arg0);
39031 if (!address_operand (op4, VOIDmode))
39033 op4 = convert_memory_address (Pmode, op4);
39034 op4 = copy_addr_to_reg (op4);
39036 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
39041 /* mode is VOIDmode if __builtin_rd* has been called
39043 if (mode == VOIDmode)
39045 target = gen_reg_rtx (mode);
39050 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
39051 op1, 1, OPTAB_DIRECT);
39052 op0 = expand_simple_binop (DImode, IOR, op0, op1,
39053 op0, 1, OPTAB_DIRECT);
39056 emit_move_insn (target, op0);
39059 case IX86_BUILTIN_FXSAVE:
39060 case IX86_BUILTIN_FXRSTOR:
39061 case IX86_BUILTIN_FXSAVE64:
39062 case IX86_BUILTIN_FXRSTOR64:
39063 case IX86_BUILTIN_FNSTENV:
39064 case IX86_BUILTIN_FLDENV:
39068 case IX86_BUILTIN_FXSAVE:
39069 icode = CODE_FOR_fxsave;
39071 case IX86_BUILTIN_FXRSTOR:
39072 icode = CODE_FOR_fxrstor;
39074 case IX86_BUILTIN_FXSAVE64:
39075 icode = CODE_FOR_fxsave64;
39077 case IX86_BUILTIN_FXRSTOR64:
39078 icode = CODE_FOR_fxrstor64;
39080 case IX86_BUILTIN_FNSTENV:
39081 icode = CODE_FOR_fnstenv;
39083 case IX86_BUILTIN_FLDENV:
39084 icode = CODE_FOR_fldenv;
39087 gcc_unreachable ();
39090 arg0 = CALL_EXPR_ARG (exp, 0);
39091 op0 = expand_normal (arg0);
39093 if (!address_operand (op0, VOIDmode))
39095 op0 = convert_memory_address (Pmode, op0);
39096 op0 = copy_addr_to_reg (op0);
39098 op0 = gen_rtx_MEM (mode0, op0);
39100 pat = GEN_FCN (icode) (op0);
39105 case IX86_BUILTIN_XSAVE:
39106 case IX86_BUILTIN_XRSTOR:
39107 case IX86_BUILTIN_XSAVE64:
39108 case IX86_BUILTIN_XRSTOR64:
39109 case IX86_BUILTIN_XSAVEOPT:
39110 case IX86_BUILTIN_XSAVEOPT64:
39111 case IX86_BUILTIN_XSAVES:
39112 case IX86_BUILTIN_XRSTORS:
39113 case IX86_BUILTIN_XSAVES64:
39114 case IX86_BUILTIN_XRSTORS64:
39115 case IX86_BUILTIN_XSAVEC:
39116 case IX86_BUILTIN_XSAVEC64:
39117 arg0 = CALL_EXPR_ARG (exp, 0);
39118 arg1 = CALL_EXPR_ARG (exp, 1);
39119 op0 = expand_normal (arg0);
39120 op1 = expand_normal (arg1);
39122 if (!address_operand (op0, VOIDmode))
39124 op0 = convert_memory_address (Pmode, op0);
39125 op0 = copy_addr_to_reg (op0);
39127 op0 = gen_rtx_MEM (BLKmode, op0);
39129 op1 = force_reg (DImode, op1);
39133 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
39134 NULL, 1, OPTAB_DIRECT);
39137 case IX86_BUILTIN_XSAVE:
39138 icode = CODE_FOR_xsave_rex64;
39140 case IX86_BUILTIN_XRSTOR:
39141 icode = CODE_FOR_xrstor_rex64;
39143 case IX86_BUILTIN_XSAVE64:
39144 icode = CODE_FOR_xsave64;
39146 case IX86_BUILTIN_XRSTOR64:
39147 icode = CODE_FOR_xrstor64;
39149 case IX86_BUILTIN_XSAVEOPT:
39150 icode = CODE_FOR_xsaveopt_rex64;
39152 case IX86_BUILTIN_XSAVEOPT64:
39153 icode = CODE_FOR_xsaveopt64;
39155 case IX86_BUILTIN_XSAVES:
39156 icode = CODE_FOR_xsaves_rex64;
39158 case IX86_BUILTIN_XRSTORS:
39159 icode = CODE_FOR_xrstors_rex64;
39161 case IX86_BUILTIN_XSAVES64:
39162 icode = CODE_FOR_xsaves64;
39164 case IX86_BUILTIN_XRSTORS64:
39165 icode = CODE_FOR_xrstors64;
39167 case IX86_BUILTIN_XSAVEC:
39168 icode = CODE_FOR_xsavec_rex64;
39170 case IX86_BUILTIN_XSAVEC64:
39171 icode = CODE_FOR_xsavec64;
39174 gcc_unreachable ();
39177 op2 = gen_lowpart (SImode, op2);
39178 op1 = gen_lowpart (SImode, op1);
39179 pat = GEN_FCN (icode) (op0, op1, op2);
39185 case IX86_BUILTIN_XSAVE:
39186 icode = CODE_FOR_xsave;
39188 case IX86_BUILTIN_XRSTOR:
39189 icode = CODE_FOR_xrstor;
39191 case IX86_BUILTIN_XSAVEOPT:
39192 icode = CODE_FOR_xsaveopt;
39194 case IX86_BUILTIN_XSAVES:
39195 icode = CODE_FOR_xsaves;
39197 case IX86_BUILTIN_XRSTORS:
39198 icode = CODE_FOR_xrstors;
39200 case IX86_BUILTIN_XSAVEC:
39201 icode = CODE_FOR_xsavec;
39204 gcc_unreachable ();
39206 pat = GEN_FCN (icode) (op0, op1);
39213 case IX86_BUILTIN_LLWPCB:
39214 arg0 = CALL_EXPR_ARG (exp, 0);
39215 op0 = expand_normal (arg0);
39216 icode = CODE_FOR_lwp_llwpcb;
39217 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39218 op0 = ix86_zero_extend_to_Pmode (op0);
39219 emit_insn (gen_lwp_llwpcb (op0));
39222 case IX86_BUILTIN_SLWPCB:
39223 icode = CODE_FOR_lwp_slwpcb;
39225 || !insn_data[icode].operand[0].predicate (target, Pmode))
39226 target = gen_reg_rtx (Pmode);
39227 emit_insn (gen_lwp_slwpcb (target));
39230 case IX86_BUILTIN_BEXTRI32:
39231 case IX86_BUILTIN_BEXTRI64:
39232 arg0 = CALL_EXPR_ARG (exp, 0);
39233 arg1 = CALL_EXPR_ARG (exp, 1);
39234 op0 = expand_normal (arg0);
39235 op1 = expand_normal (arg1);
39236 icode = (fcode == IX86_BUILTIN_BEXTRI32
39237 ? CODE_FOR_tbm_bextri_si
39238 : CODE_FOR_tbm_bextri_di);
39239 if (!CONST_INT_P (op1))
39241 error ("last argument must be an immediate");
39246 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39247 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39248 op1 = GEN_INT (length);
39249 op2 = GEN_INT (lsb_index);
39250 pat = GEN_FCN (icode) (target, op0, op1, op2);
39256 case IX86_BUILTIN_RDRAND16_STEP:
39257 icode = CODE_FOR_rdrandhi_1;
39261 case IX86_BUILTIN_RDRAND32_STEP:
39262 icode = CODE_FOR_rdrandsi_1;
39266 case IX86_BUILTIN_RDRAND64_STEP:
39267 icode = CODE_FOR_rdranddi_1;
39271 op0 = gen_reg_rtx (mode0);
39272 emit_insn (GEN_FCN (icode) (op0));
39274 arg0 = CALL_EXPR_ARG (exp, 0);
39275 op1 = expand_normal (arg0);
39276 if (!address_operand (op1, VOIDmode))
39278 op1 = convert_memory_address (Pmode, op1);
39279 op1 = copy_addr_to_reg (op1);
39281 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39283 op1 = gen_reg_rtx (SImode);
39284 emit_move_insn (op1, CONST1_RTX (SImode));
39286 /* Emit SImode conditional move. */
39287 if (mode0 == HImode)
39289 op2 = gen_reg_rtx (SImode);
39290 emit_insn (gen_zero_extendhisi2 (op2, op0));
39292 else if (mode0 == SImode)
39295 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39298 || !register_operand (target, SImode))
39299 target = gen_reg_rtx (SImode);
39301 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39303 emit_insn (gen_rtx_SET (VOIDmode, target,
39304 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39307 case IX86_BUILTIN_RDSEED16_STEP:
39308 icode = CODE_FOR_rdseedhi_1;
39312 case IX86_BUILTIN_RDSEED32_STEP:
39313 icode = CODE_FOR_rdseedsi_1;
39317 case IX86_BUILTIN_RDSEED64_STEP:
39318 icode = CODE_FOR_rdseeddi_1;
39322 op0 = gen_reg_rtx (mode0);
39323 emit_insn (GEN_FCN (icode) (op0));
39325 arg0 = CALL_EXPR_ARG (exp, 0);
39326 op1 = expand_normal (arg0);
39327 if (!address_operand (op1, VOIDmode))
39329 op1 = convert_memory_address (Pmode, op1);
39330 op1 = copy_addr_to_reg (op1);
39332 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39334 op2 = gen_reg_rtx (QImode);
39336 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39338 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
39341 || !register_operand (target, SImode))
39342 target = gen_reg_rtx (SImode);
39344 emit_insn (gen_zero_extendqisi2 (target, op2));
39347 case IX86_BUILTIN_SBB32:
39348 icode = CODE_FOR_subsi3_carry;
39352 case IX86_BUILTIN_SBB64:
39353 icode = CODE_FOR_subdi3_carry;
39357 case IX86_BUILTIN_ADDCARRYX32:
39358 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
39362 case IX86_BUILTIN_ADDCARRYX64:
39363 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
39367 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39368 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39369 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39370 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39372 op0 = gen_reg_rtx (QImode);
39374 /* Generate CF from input operand. */
39375 op1 = expand_normal (arg0);
39376 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39377 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
39379 /* Gen ADCX instruction to compute X+Y+CF. */
39380 op2 = expand_normal (arg1);
39381 op3 = expand_normal (arg2);
39384 op2 = copy_to_mode_reg (mode0, op2);
39386 op3 = copy_to_mode_reg (mode0, op3);
39388 op0 = gen_reg_rtx (mode0);
39390 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
39391 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
39392 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
39394 /* Store the result. */
39395 op4 = expand_normal (arg3);
39396 if (!address_operand (op4, VOIDmode))
39398 op4 = convert_memory_address (Pmode, op4);
39399 op4 = copy_addr_to_reg (op4);
39401 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39403 /* Return current CF value. */
39405 target = gen_reg_rtx (QImode);
39407 PUT_MODE (pat, QImode);
39408 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
39411 case IX86_BUILTIN_READ_FLAGS:
39412 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39415 || target == NULL_RTX
39416 || !nonimmediate_operand (target, word_mode)
39417 || GET_MODE (target) != word_mode)
39418 target = gen_reg_rtx (word_mode);
39420 emit_insn (gen_pop (target));
39423 case IX86_BUILTIN_WRITE_FLAGS:
39425 arg0 = CALL_EXPR_ARG (exp, 0);
39426 op0 = expand_normal (arg0);
39427 if (!general_no_elim_operand (op0, word_mode))
39428 op0 = copy_to_mode_reg (word_mode, op0);
39430 emit_insn (gen_push (op0));
39431 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39434 case IX86_BUILTIN_KORTESTC16:
39435 icode = CODE_FOR_kortestchi;
39440 case IX86_BUILTIN_KORTESTZ16:
39441 icode = CODE_FOR_kortestzhi;
39446 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39447 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39448 op0 = expand_normal (arg0);
39449 op1 = expand_normal (arg1);
39451 op0 = copy_to_reg (op0);
39452 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39453 op1 = copy_to_reg (op1);
39454 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39456 target = gen_reg_rtx (QImode);
39457 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
39459 /* Emit kortest. */
39460 emit_insn (GEN_FCN (icode) (op0, op1));
39461 /* And use setcc to return result from flags. */
39462 ix86_expand_setcc (target, EQ,
39463 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39466 case IX86_BUILTIN_GATHERSIV2DF:
39467 icode = CODE_FOR_avx2_gathersiv2df;
39469 case IX86_BUILTIN_GATHERSIV4DF:
39470 icode = CODE_FOR_avx2_gathersiv4df;
39472 case IX86_BUILTIN_GATHERDIV2DF:
39473 icode = CODE_FOR_avx2_gatherdiv2df;
39475 case IX86_BUILTIN_GATHERDIV4DF:
39476 icode = CODE_FOR_avx2_gatherdiv4df;
39478 case IX86_BUILTIN_GATHERSIV4SF:
39479 icode = CODE_FOR_avx2_gathersiv4sf;
39481 case IX86_BUILTIN_GATHERSIV8SF:
39482 icode = CODE_FOR_avx2_gathersiv8sf;
39484 case IX86_BUILTIN_GATHERDIV4SF:
39485 icode = CODE_FOR_avx2_gatherdiv4sf;
39487 case IX86_BUILTIN_GATHERDIV8SF:
39488 icode = CODE_FOR_avx2_gatherdiv8sf;
39490 case IX86_BUILTIN_GATHERSIV2DI:
39491 icode = CODE_FOR_avx2_gathersiv2di;
39493 case IX86_BUILTIN_GATHERSIV4DI:
39494 icode = CODE_FOR_avx2_gathersiv4di;
39496 case IX86_BUILTIN_GATHERDIV2DI:
39497 icode = CODE_FOR_avx2_gatherdiv2di;
39499 case IX86_BUILTIN_GATHERDIV4DI:
39500 icode = CODE_FOR_avx2_gatherdiv4di;
39502 case IX86_BUILTIN_GATHERSIV4SI:
39503 icode = CODE_FOR_avx2_gathersiv4si;
39505 case IX86_BUILTIN_GATHERSIV8SI:
39506 icode = CODE_FOR_avx2_gathersiv8si;
39508 case IX86_BUILTIN_GATHERDIV4SI:
39509 icode = CODE_FOR_avx2_gatherdiv4si;
39511 case IX86_BUILTIN_GATHERDIV8SI:
39512 icode = CODE_FOR_avx2_gatherdiv8si;
39514 case IX86_BUILTIN_GATHERALTSIV4DF:
39515 icode = CODE_FOR_avx2_gathersiv4df;
39517 case IX86_BUILTIN_GATHERALTDIV8SF:
39518 icode = CODE_FOR_avx2_gatherdiv8sf;
39520 case IX86_BUILTIN_GATHERALTSIV4DI:
39521 icode = CODE_FOR_avx2_gathersiv4di;
39523 case IX86_BUILTIN_GATHERALTDIV8SI:
39524 icode = CODE_FOR_avx2_gatherdiv8si;
39526 case IX86_BUILTIN_GATHER3SIV16SF:
39527 icode = CODE_FOR_avx512f_gathersiv16sf;
39529 case IX86_BUILTIN_GATHER3SIV8DF:
39530 icode = CODE_FOR_avx512f_gathersiv8df;
39532 case IX86_BUILTIN_GATHER3DIV16SF:
39533 icode = CODE_FOR_avx512f_gatherdiv16sf;
39535 case IX86_BUILTIN_GATHER3DIV8DF:
39536 icode = CODE_FOR_avx512f_gatherdiv8df;
39538 case IX86_BUILTIN_GATHER3SIV16SI:
39539 icode = CODE_FOR_avx512f_gathersiv16si;
39541 case IX86_BUILTIN_GATHER3SIV8DI:
39542 icode = CODE_FOR_avx512f_gathersiv8di;
39544 case IX86_BUILTIN_GATHER3DIV16SI:
39545 icode = CODE_FOR_avx512f_gatherdiv16si;
39547 case IX86_BUILTIN_GATHER3DIV8DI:
39548 icode = CODE_FOR_avx512f_gatherdiv8di;
39550 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39551 icode = CODE_FOR_avx512f_gathersiv8df;
39553 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39554 icode = CODE_FOR_avx512f_gatherdiv16sf;
39556 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39557 icode = CODE_FOR_avx512f_gathersiv8di;
39559 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39560 icode = CODE_FOR_avx512f_gatherdiv16si;
39562 case IX86_BUILTIN_GATHER3SIV2DF:
39563 icode = CODE_FOR_avx512vl_gathersiv2df;
39565 case IX86_BUILTIN_GATHER3SIV4DF:
39566 icode = CODE_FOR_avx512vl_gathersiv4df;
39568 case IX86_BUILTIN_GATHER3DIV2DF:
39569 icode = CODE_FOR_avx512vl_gatherdiv2df;
39571 case IX86_BUILTIN_GATHER3DIV4DF:
39572 icode = CODE_FOR_avx512vl_gatherdiv4df;
39574 case IX86_BUILTIN_GATHER3SIV4SF:
39575 icode = CODE_FOR_avx512vl_gathersiv4sf;
39577 case IX86_BUILTIN_GATHER3SIV8SF:
39578 icode = CODE_FOR_avx512vl_gathersiv8sf;
39580 case IX86_BUILTIN_GATHER3DIV4SF:
39581 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39583 case IX86_BUILTIN_GATHER3DIV8SF:
39584 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39586 case IX86_BUILTIN_GATHER3SIV2DI:
39587 icode = CODE_FOR_avx512vl_gathersiv2di;
39589 case IX86_BUILTIN_GATHER3SIV4DI:
39590 icode = CODE_FOR_avx512vl_gathersiv4di;
39592 case IX86_BUILTIN_GATHER3DIV2DI:
39593 icode = CODE_FOR_avx512vl_gatherdiv2di;
39595 case IX86_BUILTIN_GATHER3DIV4DI:
39596 icode = CODE_FOR_avx512vl_gatherdiv4di;
39598 case IX86_BUILTIN_GATHER3SIV4SI:
39599 icode = CODE_FOR_avx512vl_gathersiv4si;
39601 case IX86_BUILTIN_GATHER3SIV8SI:
39602 icode = CODE_FOR_avx512vl_gathersiv8si;
39604 case IX86_BUILTIN_GATHER3DIV4SI:
39605 icode = CODE_FOR_avx512vl_gatherdiv4si;
39607 case IX86_BUILTIN_GATHER3DIV8SI:
39608 icode = CODE_FOR_avx512vl_gatherdiv8si;
39610 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39611 icode = CODE_FOR_avx512vl_gathersiv4df;
39613 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39614 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39616 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39617 icode = CODE_FOR_avx512vl_gathersiv4di;
39619 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39620 icode = CODE_FOR_avx512vl_gatherdiv8si;
39622 case IX86_BUILTIN_SCATTERSIV16SF:
39623 icode = CODE_FOR_avx512f_scattersiv16sf;
39625 case IX86_BUILTIN_SCATTERSIV8DF:
39626 icode = CODE_FOR_avx512f_scattersiv8df;
39628 case IX86_BUILTIN_SCATTERDIV16SF:
39629 icode = CODE_FOR_avx512f_scatterdiv16sf;
39631 case IX86_BUILTIN_SCATTERDIV8DF:
39632 icode = CODE_FOR_avx512f_scatterdiv8df;
39634 case IX86_BUILTIN_SCATTERSIV16SI:
39635 icode = CODE_FOR_avx512f_scattersiv16si;
39637 case IX86_BUILTIN_SCATTERSIV8DI:
39638 icode = CODE_FOR_avx512f_scattersiv8di;
39640 case IX86_BUILTIN_SCATTERDIV16SI:
39641 icode = CODE_FOR_avx512f_scatterdiv16si;
39643 case IX86_BUILTIN_SCATTERDIV8DI:
39644 icode = CODE_FOR_avx512f_scatterdiv8di;
39646 case IX86_BUILTIN_SCATTERSIV8SF:
39647 icode = CODE_FOR_avx512vl_scattersiv8sf;
39649 case IX86_BUILTIN_SCATTERSIV4SF:
39650 icode = CODE_FOR_avx512vl_scattersiv4sf;
39652 case IX86_BUILTIN_SCATTERSIV4DF:
39653 icode = CODE_FOR_avx512vl_scattersiv4df;
39655 case IX86_BUILTIN_SCATTERSIV2DF:
39656 icode = CODE_FOR_avx512vl_scattersiv2df;
39658 case IX86_BUILTIN_SCATTERDIV8SF:
39659 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39661 case IX86_BUILTIN_SCATTERDIV4SF:
39662 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39664 case IX86_BUILTIN_SCATTERDIV4DF:
39665 icode = CODE_FOR_avx512vl_scatterdiv4df;
39667 case IX86_BUILTIN_SCATTERDIV2DF:
39668 icode = CODE_FOR_avx512vl_scatterdiv2df;
39670 case IX86_BUILTIN_SCATTERSIV8SI:
39671 icode = CODE_FOR_avx512vl_scattersiv8si;
39673 case IX86_BUILTIN_SCATTERSIV4SI:
39674 icode = CODE_FOR_avx512vl_scattersiv4si;
39676 case IX86_BUILTIN_SCATTERSIV4DI:
39677 icode = CODE_FOR_avx512vl_scattersiv4di;
39679 case IX86_BUILTIN_SCATTERSIV2DI:
39680 icode = CODE_FOR_avx512vl_scattersiv2di;
39682 case IX86_BUILTIN_SCATTERDIV8SI:
39683 icode = CODE_FOR_avx512vl_scatterdiv8si;
39685 case IX86_BUILTIN_SCATTERDIV4SI:
39686 icode = CODE_FOR_avx512vl_scatterdiv4si;
39688 case IX86_BUILTIN_SCATTERDIV4DI:
39689 icode = CODE_FOR_avx512vl_scatterdiv4di;
39691 case IX86_BUILTIN_SCATTERDIV2DI:
39692 icode = CODE_FOR_avx512vl_scatterdiv2di;
39694 case IX86_BUILTIN_GATHERPFDPD:
39695 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39696 goto vec_prefetch_gen;
39697 case IX86_BUILTIN_GATHERPFDPS:
39698 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39699 goto vec_prefetch_gen;
39700 case IX86_BUILTIN_GATHERPFQPD:
39701 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39702 goto vec_prefetch_gen;
39703 case IX86_BUILTIN_GATHERPFQPS:
39704 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39705 goto vec_prefetch_gen;
39706 case IX86_BUILTIN_SCATTERPFDPD:
39707 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39708 goto vec_prefetch_gen;
39709 case IX86_BUILTIN_SCATTERPFDPS:
39710 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39711 goto vec_prefetch_gen;
39712 case IX86_BUILTIN_SCATTERPFQPD:
39713 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39714 goto vec_prefetch_gen;
39715 case IX86_BUILTIN_SCATTERPFQPS:
39716 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39717 goto vec_prefetch_gen;
39721 rtx (*gen) (rtx, rtx);
39723 arg0 = CALL_EXPR_ARG (exp, 0);
39724 arg1 = CALL_EXPR_ARG (exp, 1);
39725 arg2 = CALL_EXPR_ARG (exp, 2);
39726 arg3 = CALL_EXPR_ARG (exp, 3);
39727 arg4 = CALL_EXPR_ARG (exp, 4);
39728 op0 = expand_normal (arg0);
39729 op1 = expand_normal (arg1);
39730 op2 = expand_normal (arg2);
39731 op3 = expand_normal (arg3);
39732 op4 = expand_normal (arg4);
39733 /* Note the arg order is different from the operand order. */
39734 mode0 = insn_data[icode].operand[1].mode;
39735 mode2 = insn_data[icode].operand[3].mode;
39736 mode3 = insn_data[icode].operand[4].mode;
39737 mode4 = insn_data[icode].operand[5].mode;
39739 if (target == NULL_RTX
39740 || GET_MODE (target) != insn_data[icode].operand[0].mode
39741 || !insn_data[icode].operand[0].predicate (target,
39742 GET_MODE (target)))
39743 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39745 subtarget = target;
39749 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39750 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39751 half = gen_reg_rtx (V8SImode);
39752 if (!nonimmediate_operand (op2, V16SImode))
39753 op2 = copy_to_mode_reg (V16SImode, op2);
39754 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39757 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39758 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39759 case IX86_BUILTIN_GATHERALTSIV4DF:
39760 case IX86_BUILTIN_GATHERALTSIV4DI:
39761 half = gen_reg_rtx (V4SImode);
39762 if (!nonimmediate_operand (op2, V8SImode))
39763 op2 = copy_to_mode_reg (V8SImode, op2);
39764 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39767 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39768 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39769 half = gen_reg_rtx (mode0);
39770 if (mode0 == V8SFmode)
39771 gen = gen_vec_extract_lo_v16sf;
39773 gen = gen_vec_extract_lo_v16si;
39774 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39775 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39776 emit_insn (gen (half, op0));
39778 if (GET_MODE (op3) != VOIDmode)
39780 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39781 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39782 emit_insn (gen (half, op3));
39786 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39787 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39788 case IX86_BUILTIN_GATHERALTDIV8SF:
39789 case IX86_BUILTIN_GATHERALTDIV8SI:
39790 half = gen_reg_rtx (mode0);
39791 if (mode0 == V4SFmode)
39792 gen = gen_vec_extract_lo_v8sf;
39794 gen = gen_vec_extract_lo_v8si;
39795 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39796 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39797 emit_insn (gen (half, op0));
39799 if (GET_MODE (op3) != VOIDmode)
39801 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39802 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39803 emit_insn (gen (half, op3));
39811 /* Force memory operand only with base register here. But we
39812 don't want to do it on memory operand for other builtin
39814 op1 = ix86_zero_extend_to_Pmode (op1);
39816 if (!insn_data[icode].operand[1].predicate (op0, mode0))
39817 op0 = copy_to_mode_reg (mode0, op0);
39818 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
39819 op1 = copy_to_mode_reg (Pmode, op1);
39820 if (!insn_data[icode].operand[3].predicate (op2, mode2))
39821 op2 = copy_to_mode_reg (mode2, op2);
39822 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
39824 if (!insn_data[icode].operand[4].predicate (op3, mode3))
39825 op3 = copy_to_mode_reg (mode3, op3);
39829 op3 = copy_to_reg (op3);
39830 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
39832 if (!insn_data[icode].operand[5].predicate (op4, mode4))
39834 error ("the last argument must be scale 1, 2, 4, 8");
39838 /* Optimize. If mask is known to have all high bits set,
39839 replace op0 with pc_rtx to signal that the instruction
39840 overwrites the whole destination and doesn't use its
39841 previous contents. */
39844 if (TREE_CODE (arg3) == INTEGER_CST)
39846 if (integer_all_onesp (arg3))
39849 else if (TREE_CODE (arg3) == VECTOR_CST)
39851 unsigned int negative = 0;
39852 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
39854 tree cst = VECTOR_CST_ELT (arg3, i);
39855 if (TREE_CODE (cst) == INTEGER_CST
39856 && tree_int_cst_sign_bit (cst))
39858 else if (TREE_CODE (cst) == REAL_CST
39859 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
39862 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
39865 else if (TREE_CODE (arg3) == SSA_NAME
39866 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
39868 /* Recognize also when mask is like:
39869 __v2df src = _mm_setzero_pd ();
39870 __v2df mask = _mm_cmpeq_pd (src, src);
39872 __v8sf src = _mm256_setzero_ps ();
39873 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
39874 as that is a cheaper way to load all ones into
39875 a register than having to load a constant from
39877 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
39878 if (is_gimple_call (def_stmt))
39880 tree fndecl = gimple_call_fndecl (def_stmt);
39882 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
39883 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
39885 case IX86_BUILTIN_CMPPD:
39886 case IX86_BUILTIN_CMPPS:
39887 case IX86_BUILTIN_CMPPD256:
39888 case IX86_BUILTIN_CMPPS256:
39889 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
39892 case IX86_BUILTIN_CMPEQPD:
39893 case IX86_BUILTIN_CMPEQPS:
39894 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
39895 && initializer_zerop (gimple_call_arg (def_stmt,
39906 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
39913 case IX86_BUILTIN_GATHER3DIV16SF:
39914 if (target == NULL_RTX)
39915 target = gen_reg_rtx (V8SFmode);
39916 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
39918 case IX86_BUILTIN_GATHER3DIV16SI:
39919 if (target == NULL_RTX)
39920 target = gen_reg_rtx (V8SImode);
39921 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
39923 case IX86_BUILTIN_GATHER3DIV8SF:
39924 case IX86_BUILTIN_GATHERDIV8SF:
39925 if (target == NULL_RTX)
39926 target = gen_reg_rtx (V4SFmode);
39927 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
39929 case IX86_BUILTIN_GATHER3DIV8SI:
39930 case IX86_BUILTIN_GATHERDIV8SI:
39931 if (target == NULL_RTX)
39932 target = gen_reg_rtx (V4SImode);
39933 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
39936 target = subtarget;
39942 arg0 = CALL_EXPR_ARG (exp, 0);
39943 arg1 = CALL_EXPR_ARG (exp, 1);
39944 arg2 = CALL_EXPR_ARG (exp, 2);
39945 arg3 = CALL_EXPR_ARG (exp, 3);
39946 arg4 = CALL_EXPR_ARG (exp, 4);
39947 op0 = expand_normal (arg0);
39948 op1 = expand_normal (arg1);
39949 op2 = expand_normal (arg2);
39950 op3 = expand_normal (arg3);
39951 op4 = expand_normal (arg4);
39952 mode1 = insn_data[icode].operand[1].mode;
39953 mode2 = insn_data[icode].operand[2].mode;
39954 mode3 = insn_data[icode].operand[3].mode;
39955 mode4 = insn_data[icode].operand[4].mode;
39957 /* Force memory operand only with base register here. But we
39958 don't want to do it on memory operand for other builtin
39960 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
39962 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39963 op0 = copy_to_mode_reg (Pmode, op0);
39965 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
39967 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39968 op1 = copy_to_mode_reg (mode1, op1);
39972 op1 = copy_to_reg (op1);
39973 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
39976 if (!insn_data[icode].operand[2].predicate (op2, mode2))
39977 op2 = copy_to_mode_reg (mode2, op2);
39979 if (!insn_data[icode].operand[3].predicate (op3, mode3))
39980 op3 = copy_to_mode_reg (mode3, op3);
39982 if (!insn_data[icode].operand[4].predicate (op4, mode4))
39984 error ("the last argument must be scale 1, 2, 4, 8");
39988 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
39996 arg0 = CALL_EXPR_ARG (exp, 0);
39997 arg1 = CALL_EXPR_ARG (exp, 1);
39998 arg2 = CALL_EXPR_ARG (exp, 2);
39999 arg3 = CALL_EXPR_ARG (exp, 3);
40000 arg4 = CALL_EXPR_ARG (exp, 4);
40001 op0 = expand_normal (arg0);
40002 op1 = expand_normal (arg1);
40003 op2 = expand_normal (arg2);
40004 op3 = expand_normal (arg3);
40005 op4 = expand_normal (arg4);
40006 mode0 = insn_data[icode].operand[0].mode;
40007 mode1 = insn_data[icode].operand[1].mode;
40008 mode3 = insn_data[icode].operand[3].mode;
40009 mode4 = insn_data[icode].operand[4].mode;
40011 if (GET_MODE (op0) == mode0
40012 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
40014 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40015 op0 = copy_to_mode_reg (mode0, op0);
40017 else if (op0 != constm1_rtx)
40019 op0 = copy_to_reg (op0);
40020 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
40023 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40024 op1 = copy_to_mode_reg (mode1, op1);
40026 /* Force memory operand only with base register here. But we
40027 don't want to do it on memory operand for other builtin
40029 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
40031 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
40032 op2 = copy_to_mode_reg (Pmode, op2);
40034 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40036 error ("the forth argument must be scale 1, 2, 4, 8");
40040 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40042 error ("incorrect hint operand");
40046 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40054 case IX86_BUILTIN_XABORT:
40055 icode = CODE_FOR_xabort;
40056 arg0 = CALL_EXPR_ARG (exp, 0);
40057 op0 = expand_normal (arg0);
40058 mode0 = insn_data[icode].operand[0].mode;
40059 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40061 error ("the xabort's argument must be an 8-bit immediate");
40064 emit_insn (gen_xabort (op0));
40071 for (i = 0, d = bdesc_special_args;
40072 i < ARRAY_SIZE (bdesc_special_args);
40074 if (d->code == fcode)
40075 return ix86_expand_special_args_builtin (d, exp, target);
40077 for (i = 0, d = bdesc_args;
40078 i < ARRAY_SIZE (bdesc_args);
40080 if (d->code == fcode)
40083 case IX86_BUILTIN_FABSQ:
40084 case IX86_BUILTIN_COPYSIGNQ:
40086 /* Emit a normal call if SSE isn't available. */
40087 return expand_call (exp, target, ignore);
40089 return ix86_expand_args_builtin (d, exp, target);
40092 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
40093 if (d->code == fcode)
40094 return ix86_expand_sse_comi (d, exp, target);
40096 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
40097 if (d->code == fcode)
40098 return ix86_expand_round_builtin (d, exp, target);
40100 for (i = 0, d = bdesc_pcmpestr;
40101 i < ARRAY_SIZE (bdesc_pcmpestr);
40103 if (d->code == fcode)
40104 return ix86_expand_sse_pcmpestr (d, exp, target);
40106 for (i = 0, d = bdesc_pcmpistr;
40107 i < ARRAY_SIZE (bdesc_pcmpistr);
40109 if (d->code == fcode)
40110 return ix86_expand_sse_pcmpistr (d, exp, target);
40112 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
40113 if (d->code == fcode)
40114 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
40115 (enum ix86_builtin_func_type)
40116 d->flag, d->comparison);
40118 gcc_unreachable ();
40121 /* This returns the target-specific builtin with code CODE if
40122 current_function_decl has visibility on this builtin, which is checked
40123 using isa flags. Returns NULL_TREE otherwise. */
40125 static tree ix86_get_builtin (enum ix86_builtins code)
40127 struct cl_target_option *opts;
40128 tree target_tree = NULL_TREE;
40130 /* Determine the isa flags of current_function_decl. */
40132 if (current_function_decl)
40133 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
40135 if (target_tree == NULL)
40136 target_tree = target_option_default_node;
40138 opts = TREE_TARGET_OPTION (target_tree);
40140 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
40141 return ix86_builtin_decl (code, true);
40146 /* Return function decl for target specific builtin
40147 for given MPX builtin passed i FCODE. */
40149 ix86_builtin_mpx_function (unsigned fcode)
40153 case BUILT_IN_CHKP_BNDMK:
40154 return ix86_builtins[IX86_BUILTIN_BNDMK];
40156 case BUILT_IN_CHKP_BNDSTX:
40157 return ix86_builtins[IX86_BUILTIN_BNDSTX];
40159 case BUILT_IN_CHKP_BNDLDX:
40160 return ix86_builtins[IX86_BUILTIN_BNDLDX];
40162 case BUILT_IN_CHKP_BNDCL:
40163 return ix86_builtins[IX86_BUILTIN_BNDCL];
40165 case BUILT_IN_CHKP_BNDCU:
40166 return ix86_builtins[IX86_BUILTIN_BNDCU];
40168 case BUILT_IN_CHKP_BNDRET:
40169 return ix86_builtins[IX86_BUILTIN_BNDRET];
40171 case BUILT_IN_CHKP_INTERSECT:
40172 return ix86_builtins[IX86_BUILTIN_BNDINT];
40174 case BUILT_IN_CHKP_NARROW:
40175 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40177 case BUILT_IN_CHKP_SIZEOF:
40178 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40180 case BUILT_IN_CHKP_EXTRACT_LOWER:
40181 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40183 case BUILT_IN_CHKP_EXTRACT_UPPER:
40184 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40190 gcc_unreachable ();
40193 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40195 Return an address to be used to load/store bounds for pointer
40198 SLOT_NO is an integer constant holding number of a target
40199 dependent special slot to be used in case SLOT is not a memory.
40201 SPECIAL_BASE is a pointer to be used as a base of fake address
40202 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40203 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40206 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40210 /* NULL slot means we pass bounds for pointer not passed to the
40211 function at all. Register slot means we pass pointer in a
40212 register. In both these cases bounds are passed via Bounds
40213 Table. Since we do not have actual pointer stored in memory,
40214 we have to use fake addresses to access Bounds Table. We
40215 start with (special_base - sizeof (void*)) and decrease this
40216 address by pointer size to get addresses for other slots. */
40217 if (!slot || REG_P (slot))
40219 gcc_assert (CONST_INT_P (slot_no));
40220 addr = plus_constant (Pmode, special_base,
40221 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40223 /* If pointer is passed in a memory then its address is used to
40224 access Bounds Table. */
40225 else if (MEM_P (slot))
40227 addr = XEXP (slot, 0);
40228 if (!register_operand (addr, Pmode))
40229 addr = copy_addr_to_reg (addr);
40232 gcc_unreachable ();
40237 /* Expand pass uses this hook to load bounds for function parameter
40238 PTR passed in SLOT in case its bounds are not passed in a register.
40240 If SLOT is a memory, then bounds are loaded as for regular pointer
40241 loaded from memory. PTR may be NULL in case SLOT is a memory.
40242 In such case value of PTR (if required) may be loaded from SLOT.
40244 If SLOT is NULL or a register then SLOT_NO is an integer constant
40245 holding number of the target dependent special slot which should be
40246 used to obtain bounds.
40248 Return loaded bounds. */
40251 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40253 rtx reg = gen_reg_rtx (BNDmode);
40256 /* Get address to be used to access Bounds Table. Special slots start
40257 at the location of return address of the current function. */
40258 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40260 /* Load pointer value from a memory if we don't have it. */
40263 gcc_assert (MEM_P (slot));
40264 ptr = copy_addr_to_reg (slot);
40267 emit_insn (BNDmode == BND64mode
40268 ? gen_bnd64_ldx (reg, addr, ptr)
40269 : gen_bnd32_ldx (reg, addr, ptr));
40274 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40275 passed in SLOT in case BOUNDS are not passed in a register.
40277 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40278 stored in memory. PTR may be NULL in case SLOT is a memory.
40279 In such case value of PTR (if required) may be loaded from SLOT.
40281 If SLOT is NULL or a register then SLOT_NO is an integer constant
40282 holding number of the target dependent special slot which should be
40283 used to store BOUNDS. */
40286 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40290 /* Get address to be used to access Bounds Table. Special slots start
40291 at the location of return address of a called function. */
40292 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40294 /* Load pointer value from a memory if we don't have it. */
40297 gcc_assert (MEM_P (slot));
40298 ptr = copy_addr_to_reg (slot);
40301 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40302 if (!register_operand (bounds, BNDmode))
40303 bounds = copy_to_mode_reg (BNDmode, bounds);
40305 emit_insn (BNDmode == BND64mode
40306 ? gen_bnd64_stx (addr, ptr, bounds)
40307 : gen_bnd32_stx (addr, ptr, bounds));
40310 /* Load and return bounds returned by function in SLOT. */
40313 ix86_load_returned_bounds (rtx slot)
40317 gcc_assert (REG_P (slot));
40318 res = gen_reg_rtx (BNDmode);
40319 emit_move_insn (res, slot);
40324 /* Store BOUNDS returned by function into SLOT. */
40327 ix86_store_returned_bounds (rtx slot, rtx bounds)
40329 gcc_assert (REG_P (slot));
40330 emit_move_insn (slot, bounds);
40333 /* Returns a function decl for a vectorized version of the builtin function
40334 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40335 if it is not available. */
40338 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40341 machine_mode in_mode, out_mode;
40343 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40345 if (TREE_CODE (type_out) != VECTOR_TYPE
40346 || TREE_CODE (type_in) != VECTOR_TYPE
40347 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40350 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40351 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40352 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40353 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40357 case BUILT_IN_SQRT:
40358 if (out_mode == DFmode && in_mode == DFmode)
40360 if (out_n == 2 && in_n == 2)
40361 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40362 else if (out_n == 4 && in_n == 4)
40363 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40364 else if (out_n == 8 && in_n == 8)
40365 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40369 case BUILT_IN_EXP2F:
40370 if (out_mode == SFmode && in_mode == SFmode)
40372 if (out_n == 16 && in_n == 16)
40373 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40377 case BUILT_IN_SQRTF:
40378 if (out_mode == SFmode && in_mode == SFmode)
40380 if (out_n == 4 && in_n == 4)
40381 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40382 else if (out_n == 8 && in_n == 8)
40383 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40384 else if (out_n == 16 && in_n == 16)
40385 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40389 case BUILT_IN_IFLOOR:
40390 case BUILT_IN_LFLOOR:
40391 case BUILT_IN_LLFLOOR:
40392 /* The round insn does not trap on denormals. */
40393 if (flag_trapping_math || !TARGET_ROUND)
40396 if (out_mode == SImode && in_mode == DFmode)
40398 if (out_n == 4 && in_n == 2)
40399 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40400 else if (out_n == 8 && in_n == 4)
40401 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40402 else if (out_n == 16 && in_n == 8)
40403 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40407 case BUILT_IN_IFLOORF:
40408 case BUILT_IN_LFLOORF:
40409 case BUILT_IN_LLFLOORF:
40410 /* The round insn does not trap on denormals. */
40411 if (flag_trapping_math || !TARGET_ROUND)
40414 if (out_mode == SImode && in_mode == SFmode)
40416 if (out_n == 4 && in_n == 4)
40417 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40418 else if (out_n == 8 && in_n == 8)
40419 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40423 case BUILT_IN_ICEIL:
40424 case BUILT_IN_LCEIL:
40425 case BUILT_IN_LLCEIL:
40426 /* The round insn does not trap on denormals. */
40427 if (flag_trapping_math || !TARGET_ROUND)
40430 if (out_mode == SImode && in_mode == DFmode)
40432 if (out_n == 4 && in_n == 2)
40433 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40434 else if (out_n == 8 && in_n == 4)
40435 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40436 else if (out_n == 16 && in_n == 8)
40437 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40441 case BUILT_IN_ICEILF:
40442 case BUILT_IN_LCEILF:
40443 case BUILT_IN_LLCEILF:
40444 /* The round insn does not trap on denormals. */
40445 if (flag_trapping_math || !TARGET_ROUND)
40448 if (out_mode == SImode && in_mode == SFmode)
40450 if (out_n == 4 && in_n == 4)
40451 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40452 else if (out_n == 8 && in_n == 8)
40453 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40457 case BUILT_IN_IRINT:
40458 case BUILT_IN_LRINT:
40459 case BUILT_IN_LLRINT:
40460 if (out_mode == SImode && in_mode == DFmode)
40462 if (out_n == 4 && in_n == 2)
40463 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40464 else if (out_n == 8 && in_n == 4)
40465 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40469 case BUILT_IN_IRINTF:
40470 case BUILT_IN_LRINTF:
40471 case BUILT_IN_LLRINTF:
40472 if (out_mode == SImode && in_mode == SFmode)
40474 if (out_n == 4 && in_n == 4)
40475 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40476 else if (out_n == 8 && in_n == 8)
40477 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40481 case BUILT_IN_IROUND:
40482 case BUILT_IN_LROUND:
40483 case BUILT_IN_LLROUND:
40484 /* The round insn does not trap on denormals. */
40485 if (flag_trapping_math || !TARGET_ROUND)
40488 if (out_mode == SImode && in_mode == DFmode)
40490 if (out_n == 4 && in_n == 2)
40491 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40492 else if (out_n == 8 && in_n == 4)
40493 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40494 else if (out_n == 16 && in_n == 8)
40495 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40499 case BUILT_IN_IROUNDF:
40500 case BUILT_IN_LROUNDF:
40501 case BUILT_IN_LLROUNDF:
40502 /* The round insn does not trap on denormals. */
40503 if (flag_trapping_math || !TARGET_ROUND)
40506 if (out_mode == SImode && in_mode == SFmode)
40508 if (out_n == 4 && in_n == 4)
40509 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40510 else if (out_n == 8 && in_n == 8)
40511 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40515 case BUILT_IN_COPYSIGN:
40516 if (out_mode == DFmode && in_mode == DFmode)
40518 if (out_n == 2 && in_n == 2)
40519 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40520 else if (out_n == 4 && in_n == 4)
40521 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40522 else if (out_n == 8 && in_n == 8)
40523 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40527 case BUILT_IN_COPYSIGNF:
40528 if (out_mode == SFmode && in_mode == SFmode)
40530 if (out_n == 4 && in_n == 4)
40531 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40532 else if (out_n == 8 && in_n == 8)
40533 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40534 else if (out_n == 16 && in_n == 16)
40535 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40539 case BUILT_IN_FLOOR:
40540 /* The round insn does not trap on denormals. */
40541 if (flag_trapping_math || !TARGET_ROUND)
40544 if (out_mode == DFmode && in_mode == DFmode)
40546 if (out_n == 2 && in_n == 2)
40547 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40548 else if (out_n == 4 && in_n == 4)
40549 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40553 case BUILT_IN_FLOORF:
40554 /* The round insn does not trap on denormals. */
40555 if (flag_trapping_math || !TARGET_ROUND)
40558 if (out_mode == SFmode && in_mode == SFmode)
40560 if (out_n == 4 && in_n == 4)
40561 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40562 else if (out_n == 8 && in_n == 8)
40563 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40567 case BUILT_IN_CEIL:
40568 /* The round insn does not trap on denormals. */
40569 if (flag_trapping_math || !TARGET_ROUND)
40572 if (out_mode == DFmode && in_mode == DFmode)
40574 if (out_n == 2 && in_n == 2)
40575 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40576 else if (out_n == 4 && in_n == 4)
40577 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40581 case BUILT_IN_CEILF:
40582 /* The round insn does not trap on denormals. */
40583 if (flag_trapping_math || !TARGET_ROUND)
40586 if (out_mode == SFmode && in_mode == SFmode)
40588 if (out_n == 4 && in_n == 4)
40589 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40590 else if (out_n == 8 && in_n == 8)
40591 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40595 case BUILT_IN_TRUNC:
40596 /* The round insn does not trap on denormals. */
40597 if (flag_trapping_math || !TARGET_ROUND)
40600 if (out_mode == DFmode && in_mode == DFmode)
40602 if (out_n == 2 && in_n == 2)
40603 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40604 else if (out_n == 4 && in_n == 4)
40605 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40609 case BUILT_IN_TRUNCF:
40610 /* The round insn does not trap on denormals. */
40611 if (flag_trapping_math || !TARGET_ROUND)
40614 if (out_mode == SFmode && in_mode == SFmode)
40616 if (out_n == 4 && in_n == 4)
40617 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40618 else if (out_n == 8 && in_n == 8)
40619 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40623 case BUILT_IN_RINT:
40624 /* The round insn does not trap on denormals. */
40625 if (flag_trapping_math || !TARGET_ROUND)
40628 if (out_mode == DFmode && in_mode == DFmode)
40630 if (out_n == 2 && in_n == 2)
40631 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40632 else if (out_n == 4 && in_n == 4)
40633 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40637 case BUILT_IN_RINTF:
40638 /* The round insn does not trap on denormals. */
40639 if (flag_trapping_math || !TARGET_ROUND)
40642 if (out_mode == SFmode && in_mode == SFmode)
40644 if (out_n == 4 && in_n == 4)
40645 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40646 else if (out_n == 8 && in_n == 8)
40647 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40651 case BUILT_IN_ROUND:
40652 /* The round insn does not trap on denormals. */
40653 if (flag_trapping_math || !TARGET_ROUND)
40656 if (out_mode == DFmode && in_mode == DFmode)
40658 if (out_n == 2 && in_n == 2)
40659 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40660 else if (out_n == 4 && in_n == 4)
40661 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40665 case BUILT_IN_ROUNDF:
40666 /* The round insn does not trap on denormals. */
40667 if (flag_trapping_math || !TARGET_ROUND)
40670 if (out_mode == SFmode && in_mode == SFmode)
40672 if (out_n == 4 && in_n == 4)
40673 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40674 else if (out_n == 8 && in_n == 8)
40675 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40680 if (out_mode == DFmode && in_mode == DFmode)
40682 if (out_n == 2 && in_n == 2)
40683 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40684 if (out_n == 4 && in_n == 4)
40685 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40689 case BUILT_IN_FMAF:
40690 if (out_mode == SFmode && in_mode == SFmode)
40692 if (out_n == 4 && in_n == 4)
40693 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40694 if (out_n == 8 && in_n == 8)
40695 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40703 /* Dispatch to a handler for a vectorization library. */
40704 if (ix86_veclib_handler)
40705 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40711 /* Handler for an SVML-style interface to
40712 a library with vectorized intrinsics. */
40715 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40718 tree fntype, new_fndecl, args;
40721 machine_mode el_mode, in_mode;
40724 /* The SVML is suitable for unsafe math only. */
40725 if (!flag_unsafe_math_optimizations)
40728 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40729 n = TYPE_VECTOR_SUBPARTS (type_out);
40730 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40731 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40732 if (el_mode != in_mode
40740 case BUILT_IN_LOG10:
40742 case BUILT_IN_TANH:
40744 case BUILT_IN_ATAN:
40745 case BUILT_IN_ATAN2:
40746 case BUILT_IN_ATANH:
40747 case BUILT_IN_CBRT:
40748 case BUILT_IN_SINH:
40750 case BUILT_IN_ASINH:
40751 case BUILT_IN_ASIN:
40752 case BUILT_IN_COSH:
40754 case BUILT_IN_ACOSH:
40755 case BUILT_IN_ACOS:
40756 if (el_mode != DFmode || n != 2)
40760 case BUILT_IN_EXPF:
40761 case BUILT_IN_LOGF:
40762 case BUILT_IN_LOG10F:
40763 case BUILT_IN_POWF:
40764 case BUILT_IN_TANHF:
40765 case BUILT_IN_TANF:
40766 case BUILT_IN_ATANF:
40767 case BUILT_IN_ATAN2F:
40768 case BUILT_IN_ATANHF:
40769 case BUILT_IN_CBRTF:
40770 case BUILT_IN_SINHF:
40771 case BUILT_IN_SINF:
40772 case BUILT_IN_ASINHF:
40773 case BUILT_IN_ASINF:
40774 case BUILT_IN_COSHF:
40775 case BUILT_IN_COSF:
40776 case BUILT_IN_ACOSHF:
40777 case BUILT_IN_ACOSF:
40778 if (el_mode != SFmode || n != 4)
40786 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40788 if (fn == BUILT_IN_LOGF)
40789 strcpy (name, "vmlsLn4");
40790 else if (fn == BUILT_IN_LOG)
40791 strcpy (name, "vmldLn2");
40794 sprintf (name, "vmls%s", bname+10);
40795 name[strlen (name)-1] = '4';
40798 sprintf (name, "vmld%s2", bname+10);
40800 /* Convert to uppercase. */
40804 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40806 args = TREE_CHAIN (args))
40810 fntype = build_function_type_list (type_out, type_in, NULL);
40812 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40814 /* Build a function declaration for the vectorized function. */
40815 new_fndecl = build_decl (BUILTINS_LOCATION,
40816 FUNCTION_DECL, get_identifier (name), fntype);
40817 TREE_PUBLIC (new_fndecl) = 1;
40818 DECL_EXTERNAL (new_fndecl) = 1;
40819 DECL_IS_NOVOPS (new_fndecl) = 1;
40820 TREE_READONLY (new_fndecl) = 1;
40825 /* Handler for an ACML-style interface to
40826 a library with vectorized intrinsics. */
40829 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
40831 char name[20] = "__vr.._";
40832 tree fntype, new_fndecl, args;
40835 machine_mode el_mode, in_mode;
40838 /* The ACML is 64bits only and suitable for unsafe math only as
40839 it does not correctly support parts of IEEE with the required
40840 precision such as denormals. */
40842 || !flag_unsafe_math_optimizations)
40845 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40846 n = TYPE_VECTOR_SUBPARTS (type_out);
40847 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40848 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40849 if (el_mode != in_mode
40859 case BUILT_IN_LOG2:
40860 case BUILT_IN_LOG10:
40863 if (el_mode != DFmode
40868 case BUILT_IN_SINF:
40869 case BUILT_IN_COSF:
40870 case BUILT_IN_EXPF:
40871 case BUILT_IN_POWF:
40872 case BUILT_IN_LOGF:
40873 case BUILT_IN_LOG2F:
40874 case BUILT_IN_LOG10F:
40877 if (el_mode != SFmode
40886 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40887 sprintf (name + 7, "%s", bname+10);
40890 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40892 args = TREE_CHAIN (args))
40896 fntype = build_function_type_list (type_out, type_in, NULL);
40898 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
40900 /* Build a function declaration for the vectorized function. */
40901 new_fndecl = build_decl (BUILTINS_LOCATION,
40902 FUNCTION_DECL, get_identifier (name), fntype);
40903 TREE_PUBLIC (new_fndecl) = 1;
40904 DECL_EXTERNAL (new_fndecl) = 1;
40905 DECL_IS_NOVOPS (new_fndecl) = 1;
40906 TREE_READONLY (new_fndecl) = 1;
40911 /* Returns a decl of a function that implements gather load with
40912 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
40913 Return NULL_TREE if it is not available. */
40916 ix86_vectorize_builtin_gather (const_tree mem_vectype,
40917 const_tree index_type, int scale)
40920 enum ix86_builtins code;
40925 if ((TREE_CODE (index_type) != INTEGER_TYPE
40926 && !POINTER_TYPE_P (index_type))
40927 || (TYPE_MODE (index_type) != SImode
40928 && TYPE_MODE (index_type) != DImode))
40931 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
40934 /* v*gather* insn sign extends index to pointer mode. */
40935 if (TYPE_PRECISION (index_type) < POINTER_SIZE
40936 && TYPE_UNSIGNED (index_type))
40941 || (scale & (scale - 1)) != 0)
40944 si = TYPE_MODE (index_type) == SImode;
40945 switch (TYPE_MODE (mem_vectype))
40948 if (TARGET_AVX512VL)
40949 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
40951 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
40954 if (TARGET_AVX512VL)
40955 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
40957 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
40960 if (TARGET_AVX512VL)
40961 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
40963 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
40966 if (TARGET_AVX512VL)
40967 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
40969 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
40972 if (TARGET_AVX512VL)
40973 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
40975 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
40978 if (TARGET_AVX512VL)
40979 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
40981 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
40984 if (TARGET_AVX512VL)
40985 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
40987 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
40990 if (TARGET_AVX512VL)
40991 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
40993 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
40996 if (TARGET_AVX512F)
40997 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
41002 if (TARGET_AVX512F)
41003 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
41008 if (TARGET_AVX512F)
41009 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
41014 if (TARGET_AVX512F)
41015 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
41023 return ix86_get_builtin (code);
41026 /* Returns a code for a target-specific builtin that implements
41027 reciprocal of the function, or NULL_TREE if not available. */
41030 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
41032 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
41033 && flag_finite_math_only && !flag_trapping_math
41034 && flag_unsafe_math_optimizations))
41038 /* Machine dependent builtins. */
41041 /* Vectorized version of sqrt to rsqrt conversion. */
41042 case IX86_BUILTIN_SQRTPS_NR:
41043 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
41045 case IX86_BUILTIN_SQRTPS_NR256:
41046 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
41052 /* Normal builtins. */
41055 /* Sqrt to rsqrt conversion. */
41056 case BUILT_IN_SQRTF:
41057 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
41064 /* Helper for avx_vpermilps256_operand et al. This is also used by
41065 the expansion functions to turn the parallel back into a mask.
41066 The return value is 0 for no match and the imm8+1 for a match. */
41069 avx_vpermilp_parallel (rtx par, machine_mode mode)
41071 unsigned i, nelt = GET_MODE_NUNITS (mode);
41073 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
41075 if (XVECLEN (par, 0) != (int) nelt)
41078 /* Validate that all of the elements are constants, and not totally
41079 out of range. Copy the data into an integral array to make the
41080 subsequent checks easier. */
41081 for (i = 0; i < nelt; ++i)
41083 rtx er = XVECEXP (par, 0, i);
41084 unsigned HOST_WIDE_INT ei;
41086 if (!CONST_INT_P (er))
41097 /* In the 512-bit DFmode case, we can only move elements within
41098 a 128-bit lane. First fill the second part of the mask,
41100 for (i = 4; i < 6; ++i)
41102 if (ipar[i] < 4 || ipar[i] >= 6)
41104 mask |= (ipar[i] - 4) << i;
41106 for (i = 6; i < 8; ++i)
41110 mask |= (ipar[i] - 6) << i;
41115 /* In the 256-bit DFmode case, we can only move elements within
41117 for (i = 0; i < 2; ++i)
41121 mask |= ipar[i] << i;
41123 for (i = 2; i < 4; ++i)
41127 mask |= (ipar[i] - 2) << i;
41132 /* In 512 bit SFmode case, permutation in the upper 256 bits
41133 must mirror the permutation in the lower 256-bits. */
41134 for (i = 0; i < 8; ++i)
41135 if (ipar[i] + 8 != ipar[i + 8])
41140 /* In 256 bit SFmode case, we have full freedom of
41141 movement within the low 128-bit lane, but the high 128-bit
41142 lane must mirror the exact same pattern. */
41143 for (i = 0; i < 4; ++i)
41144 if (ipar[i] + 4 != ipar[i + 4])
41151 /* In the 128-bit case, we've full freedom in the placement of
41152 the elements from the source operand. */
41153 for (i = 0; i < nelt; ++i)
41154 mask |= ipar[i] << (i * (nelt / 2));
41158 gcc_unreachable ();
41161 /* Make sure success has a non-zero value by adding one. */
41165 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
41166 the expansion functions to turn the parallel back into a mask.
41167 The return value is 0 for no match and the imm8+1 for a match. */
41170 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41172 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41174 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41176 if (XVECLEN (par, 0) != (int) nelt)
41179 /* Validate that all of the elements are constants, and not totally
41180 out of range. Copy the data into an integral array to make the
41181 subsequent checks easier. */
41182 for (i = 0; i < nelt; ++i)
41184 rtx er = XVECEXP (par, 0, i);
41185 unsigned HOST_WIDE_INT ei;
41187 if (!CONST_INT_P (er))
41190 if (ei >= 2 * nelt)
41195 /* Validate that the halves of the permute are halves. */
41196 for (i = 0; i < nelt2 - 1; ++i)
41197 if (ipar[i] + 1 != ipar[i + 1])
41199 for (i = nelt2; i < nelt - 1; ++i)
41200 if (ipar[i] + 1 != ipar[i + 1])
41203 /* Reconstruct the mask. */
41204 for (i = 0; i < 2; ++i)
41206 unsigned e = ipar[i * nelt2];
41210 mask |= e << (i * 4);
41213 /* Make sure success has a non-zero value by adding one. */
41217 /* Return a register priority for hard reg REGNO. */
41219 ix86_register_priority (int hard_regno)
41221 /* ebp and r13 as the base always wants a displacement, r12 as the
41222 base always wants an index. So discourage their usage in an
41224 if (hard_regno == R12_REG || hard_regno == R13_REG)
41226 if (hard_regno == BP_REG)
41228 /* New x86-64 int registers result in bigger code size. Discourage
41230 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41232 /* New x86-64 SSE registers result in bigger code size. Discourage
41234 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41236 /* Usage of AX register results in smaller code. Prefer it. */
41237 if (hard_regno == 0)
41242 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41244 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41245 QImode must go into class Q_REGS.
41246 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41247 movdf to do mem-to-mem moves through integer regs. */
41250 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41252 machine_mode mode = GET_MODE (x);
41254 /* We're only allowed to return a subclass of CLASS. Many of the
41255 following checks fail for NO_REGS, so eliminate that early. */
41256 if (regclass == NO_REGS)
41259 /* All classes can load zeros. */
41260 if (x == CONST0_RTX (mode))
41263 /* Force constants into memory if we are loading a (nonzero) constant into
41264 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41265 instructions to load from a constant. */
41267 && (MAYBE_MMX_CLASS_P (regclass)
41268 || MAYBE_SSE_CLASS_P (regclass)
41269 || MAYBE_MASK_CLASS_P (regclass)))
41272 /* Prefer SSE regs only, if we can use them for math. */
41273 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41274 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41276 /* Floating-point constants need more complex checks. */
41277 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
41279 /* General regs can load everything. */
41280 if (reg_class_subset_p (regclass, GENERAL_REGS))
41283 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41284 zero above. We only want to wind up preferring 80387 registers if
41285 we plan on doing computation with them. */
41287 && standard_80387_constant_p (x) > 0)
41289 /* Limit class to non-sse. */
41290 if (regclass == FLOAT_SSE_REGS)
41292 if (regclass == FP_TOP_SSE_REGS)
41294 if (regclass == FP_SECOND_SSE_REGS)
41295 return FP_SECOND_REG;
41296 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41303 /* Generally when we see PLUS here, it's the function invariant
41304 (plus soft-fp const_int). Which can only be computed into general
41306 if (GET_CODE (x) == PLUS)
41307 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41309 /* QImode constants are easy to load, but non-constant QImode data
41310 must go into Q_REGS. */
41311 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41313 if (reg_class_subset_p (regclass, Q_REGS))
41315 if (reg_class_subset_p (Q_REGS, regclass))
41323 /* Discourage putting floating-point values in SSE registers unless
41324 SSE math is being used, and likewise for the 387 registers. */
41326 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41328 machine_mode mode = GET_MODE (x);
41330 /* Restrict the output reload class to the register bank that we are doing
41331 math on. If we would like not to return a subset of CLASS, reject this
41332 alternative: if reload cannot do this, it will still use its choice. */
41333 mode = GET_MODE (x);
41334 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41335 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41337 if (X87_FLOAT_MODE_P (mode))
41339 if (regclass == FP_TOP_SSE_REGS)
41341 else if (regclass == FP_SECOND_SSE_REGS)
41342 return FP_SECOND_REG;
41344 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41351 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41352 machine_mode mode, secondary_reload_info *sri)
41354 /* Double-word spills from general registers to non-offsettable memory
41355 references (zero-extended addresses) require special handling. */
41358 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41359 && INTEGER_CLASS_P (rclass)
41360 && !offsettable_memref_p (x))
41363 ? CODE_FOR_reload_noff_load
41364 : CODE_FOR_reload_noff_store);
41365 /* Add the cost of moving address to a temporary. */
41366 sri->extra_cost = 1;
41371 /* QImode spills from non-QI registers require
41372 intermediate register on 32bit targets. */
41374 && (MAYBE_MASK_CLASS_P (rclass)
41375 || (!TARGET_64BIT && !in_p
41376 && INTEGER_CLASS_P (rclass)
41377 && MAYBE_NON_Q_CLASS_P (rclass))))
41386 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41387 regno = true_regnum (x);
41389 /* Return Q_REGS if the operand is in memory. */
41394 /* This condition handles corner case where an expression involving
41395 pointers gets vectorized. We're trying to use the address of a
41396 stack slot as a vector initializer.
41398 (set (reg:V2DI 74 [ vect_cst_.2 ])
41399 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41401 Eventually frame gets turned into sp+offset like this:
41403 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41404 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41405 (const_int 392 [0x188]))))
41407 That later gets turned into:
41409 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41410 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41411 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41413 We'll have the following reload recorded:
41415 Reload 0: reload_in (DI) =
41416 (plus:DI (reg/f:DI 7 sp)
41417 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41418 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41419 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41420 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41421 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41422 reload_reg_rtx: (reg:V2DI 22 xmm1)
41424 Which isn't going to work since SSE instructions can't handle scalar
41425 additions. Returning GENERAL_REGS forces the addition into integer
41426 register and reload can handle subsequent reloads without problems. */
41428 if (in_p && GET_CODE (x) == PLUS
41429 && SSE_CLASS_P (rclass)
41430 && SCALAR_INT_MODE_P (mode))
41431 return GENERAL_REGS;
41436 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41439 ix86_class_likely_spilled_p (reg_class_t rclass)
41450 case SSE_FIRST_REG:
41452 case FP_SECOND_REG:
41463 /* If we are copying between general and FP registers, we need a memory
41464 location. The same is true for SSE and MMX registers.
41466 To optimize register_move_cost performance, allow inline variant.
41468 The macro can't work reliably when one of the CLASSES is class containing
41469 registers from multiple units (SSE, MMX, integer). We avoid this by never
41470 combining those units in single alternative in the machine description.
41471 Ensure that this constraint holds to avoid unexpected surprises.
41473 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41474 enforce these sanity checks. */
41477 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41478 machine_mode mode, int strict)
41480 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41482 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41483 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41484 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41485 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41486 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41487 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41489 gcc_assert (!strict || lra_in_progress);
41493 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41496 /* Between mask and general, we have moves no larger than word size. */
41497 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41498 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41501 /* ??? This is a lie. We do have moves between mmx/general, and for
41502 mmx/sse2. But by saying we need secondary memory we discourage the
41503 register allocator from using the mmx registers unless needed. */
41504 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41507 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41509 /* SSE1 doesn't have any direct moves from other classes. */
41513 /* If the target says that inter-unit moves are more expensive
41514 than moving through memory, then don't generate them. */
41515 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41516 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41519 /* Between SSE and general, we have moves no larger than word size. */
41520 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41528 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41529 machine_mode mode, int strict)
41531 return inline_secondary_memory_needed (class1, class2, mode, strict);
41534 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41536 On the 80386, this is the size of MODE in words,
41537 except in the FP regs, where a single reg is always enough. */
41539 static unsigned char
41540 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41542 if (MAYBE_INTEGER_CLASS_P (rclass))
41544 if (mode == XFmode)
41545 return (TARGET_64BIT ? 2 : 3);
41546 else if (mode == XCmode)
41547 return (TARGET_64BIT ? 4 : 6);
41549 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41553 if (COMPLEX_MODE_P (mode))
41560 /* Return true if the registers in CLASS cannot represent the change from
41561 modes FROM to TO. */
41564 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41565 enum reg_class regclass)
41570 /* x87 registers can't do subreg at all, as all values are reformatted
41571 to extended precision. */
41572 if (MAYBE_FLOAT_CLASS_P (regclass))
41575 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41577 /* Vector registers do not support QI or HImode loads. If we don't
41578 disallow a change to these modes, reload will assume it's ok to
41579 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41580 the vec_dupv4hi pattern. */
41581 if (GET_MODE_SIZE (from) < 4)
41588 /* Return the cost of moving data of mode M between a
41589 register and memory. A value of 2 is the default; this cost is
41590 relative to those in `REGISTER_MOVE_COST'.
41592 This function is used extensively by register_move_cost that is used to
41593 build tables at startup. Make it inline in this case.
41594 When IN is 2, return maximum of in and out move cost.
41596 If moving between registers and memory is more expensive than
41597 between two registers, you should define this macro to express the
41600 Model also increased moving costs of QImode registers in non
41604 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41608 if (FLOAT_CLASS_P (regclass))
41626 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41627 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41629 if (SSE_CLASS_P (regclass))
41632 switch (GET_MODE_SIZE (mode))
41647 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41648 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41650 if (MMX_CLASS_P (regclass))
41653 switch (GET_MODE_SIZE (mode))
41665 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41666 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41668 switch (GET_MODE_SIZE (mode))
41671 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41674 return ix86_cost->int_store[0];
41675 if (TARGET_PARTIAL_REG_DEPENDENCY
41676 && optimize_function_for_speed_p (cfun))
41677 cost = ix86_cost->movzbl_load;
41679 cost = ix86_cost->int_load[0];
41681 return MAX (cost, ix86_cost->int_store[0]);
41687 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41689 return ix86_cost->movzbl_load;
41691 return ix86_cost->int_store[0] + 4;
41696 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41697 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41699 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41700 if (mode == TFmode)
41703 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41705 cost = ix86_cost->int_load[2];
41707 cost = ix86_cost->int_store[2];
41708 return (cost * (((int) GET_MODE_SIZE (mode)
41709 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41714 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41717 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41721 /* Return the cost of moving data from a register in class CLASS1 to
41722 one in class CLASS2.
41724 It is not required that the cost always equal 2 when FROM is the same as TO;
41725 on some machines it is expensive to move between registers if they are not
41726 general registers. */
41729 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41730 reg_class_t class2_i)
41732 enum reg_class class1 = (enum reg_class) class1_i;
41733 enum reg_class class2 = (enum reg_class) class2_i;
41735 /* In case we require secondary memory, compute cost of the store followed
41736 by load. In order to avoid bad register allocation choices, we need
41737 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41739 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41743 cost += inline_memory_move_cost (mode, class1, 2);
41744 cost += inline_memory_move_cost (mode, class2, 2);
41746 /* In case of copying from general_purpose_register we may emit multiple
41747 stores followed by single load causing memory size mismatch stall.
41748 Count this as arbitrarily high cost of 20. */
41749 if (targetm.class_max_nregs (class1, mode)
41750 > targetm.class_max_nregs (class2, mode))
41753 /* In the case of FP/MMX moves, the registers actually overlap, and we
41754 have to switch modes in order to treat them differently. */
41755 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
41756 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
41762 /* Moves between SSE/MMX and integer unit are expensive. */
41763 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
41764 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41766 /* ??? By keeping returned value relatively high, we limit the number
41767 of moves between integer and MMX/SSE registers for all targets.
41768 Additionally, high value prevents problem with x86_modes_tieable_p(),
41769 where integer modes in MMX/SSE registers are not tieable
41770 because of missing QImode and HImode moves to, from or between
41771 MMX/SSE registers. */
41772 return MAX (8, ix86_cost->mmxsse_to_integer);
41774 if (MAYBE_FLOAT_CLASS_P (class1))
41775 return ix86_cost->fp_move;
41776 if (MAYBE_SSE_CLASS_P (class1))
41777 return ix86_cost->sse_move;
41778 if (MAYBE_MMX_CLASS_P (class1))
41779 return ix86_cost->mmx_move;
41783 /* Return TRUE if hard register REGNO can hold a value of machine-mode
41787 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
41789 /* Flags and only flags can only hold CCmode values. */
41790 if (CC_REGNO_P (regno))
41791 return GET_MODE_CLASS (mode) == MODE_CC;
41792 if (GET_MODE_CLASS (mode) == MODE_CC
41793 || GET_MODE_CLASS (mode) == MODE_RANDOM
41794 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
41796 if (STACK_REGNO_P (regno))
41797 return VALID_FP_MODE_P (mode);
41798 if (MASK_REGNO_P (regno))
41799 return (VALID_MASK_REG_MODE (mode)
41800 || (TARGET_AVX512BW
41801 && VALID_MASK_AVX512BW_MODE (mode)));
41802 if (BND_REGNO_P (regno))
41803 return VALID_BND_REG_MODE (mode);
41804 if (SSE_REGNO_P (regno))
41806 /* We implement the move patterns for all vector modes into and
41807 out of SSE registers, even when no operation instructions
41810 /* For AVX-512 we allow, regardless of regno:
41812 - any of 512-bit wide vector mode
41813 - any scalar mode. */
41816 || VALID_AVX512F_REG_MODE (mode)
41817 || VALID_AVX512F_SCALAR_MODE (mode)))
41820 /* TODO check for QI/HI scalars. */
41821 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
41822 if (TARGET_AVX512VL
41825 || VALID_AVX256_REG_MODE (mode)
41826 || VALID_AVX512VL_128_REG_MODE (mode)))
41829 /* xmm16-xmm31 are only available for AVX-512. */
41830 if (EXT_REX_SSE_REGNO_P (regno))
41833 /* OImode and AVX modes are available only when AVX is enabled. */
41834 return ((TARGET_AVX
41835 && VALID_AVX256_REG_OR_OI_MODE (mode))
41836 || VALID_SSE_REG_MODE (mode)
41837 || VALID_SSE2_REG_MODE (mode)
41838 || VALID_MMX_REG_MODE (mode)
41839 || VALID_MMX_REG_MODE_3DNOW (mode));
41841 if (MMX_REGNO_P (regno))
41843 /* We implement the move patterns for 3DNOW modes even in MMX mode,
41844 so if the register is available at all, then we can move data of
41845 the given mode into or out of it. */
41846 return (VALID_MMX_REG_MODE (mode)
41847 || VALID_MMX_REG_MODE_3DNOW (mode));
41850 if (mode == QImode)
41852 /* Take care for QImode values - they can be in non-QI regs,
41853 but then they do cause partial register stalls. */
41854 if (ANY_QI_REGNO_P (regno))
41856 if (!TARGET_PARTIAL_REG_STALL)
41858 /* LRA checks if the hard register is OK for the given mode.
41859 QImode values can live in non-QI regs, so we allow all
41861 if (lra_in_progress)
41863 return !can_create_pseudo_p ();
41865 /* We handle both integer and floats in the general purpose registers. */
41866 else if (VALID_INT_MODE_P (mode))
41868 else if (VALID_FP_MODE_P (mode))
41870 else if (VALID_DFP_MODE_P (mode))
41872 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
41873 on to use that value in smaller contexts, this can easily force a
41874 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
41875 supporting DImode, allow it. */
41876 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
41882 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
41883 tieable integer mode. */
41886 ix86_tieable_integer_mode_p (machine_mode mode)
41895 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
41898 return TARGET_64BIT;
41905 /* Return true if MODE1 is accessible in a register that can hold MODE2
41906 without copying. That is, all register classes that can hold MODE2
41907 can also hold MODE1. */
41910 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
41912 if (mode1 == mode2)
41915 if (ix86_tieable_integer_mode_p (mode1)
41916 && ix86_tieable_integer_mode_p (mode2))
41919 /* MODE2 being XFmode implies fp stack or general regs, which means we
41920 can tie any smaller floating point modes to it. Note that we do not
41921 tie this with TFmode. */
41922 if (mode2 == XFmode)
41923 return mode1 == SFmode || mode1 == DFmode;
41925 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
41926 that we can tie it with SFmode. */
41927 if (mode2 == DFmode)
41928 return mode1 == SFmode;
41930 /* If MODE2 is only appropriate for an SSE register, then tie with
41931 any other mode acceptable to SSE registers. */
41932 if (GET_MODE_SIZE (mode2) == 32
41933 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41934 return (GET_MODE_SIZE (mode1) == 32
41935 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41936 if (GET_MODE_SIZE (mode2) == 16
41937 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
41938 return (GET_MODE_SIZE (mode1) == 16
41939 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
41941 /* If MODE2 is appropriate for an MMX register, then tie
41942 with any other mode acceptable to MMX registers. */
41943 if (GET_MODE_SIZE (mode2) == 8
41944 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
41945 return (GET_MODE_SIZE (mode1) == 8
41946 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
41951 /* Return the cost of moving between two registers of mode MODE. */
41954 ix86_set_reg_reg_cost (machine_mode mode)
41956 unsigned int units = UNITS_PER_WORD;
41958 switch (GET_MODE_CLASS (mode))
41964 units = GET_MODE_SIZE (CCmode);
41968 if ((TARGET_SSE && mode == TFmode)
41969 || (TARGET_80387 && mode == XFmode)
41970 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
41971 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
41972 units = GET_MODE_SIZE (mode);
41975 case MODE_COMPLEX_FLOAT:
41976 if ((TARGET_SSE && mode == TCmode)
41977 || (TARGET_80387 && mode == XCmode)
41978 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
41979 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
41980 units = GET_MODE_SIZE (mode);
41983 case MODE_VECTOR_INT:
41984 case MODE_VECTOR_FLOAT:
41985 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41986 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41987 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41988 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41989 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
41990 units = GET_MODE_SIZE (mode);
41993 /* Return the cost of moving between two registers of mode MODE,
41994 assuming that the move will be in pieces of at most UNITS bytes. */
41995 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
41998 /* Compute a (partial) cost for rtx X. Return true if the complete
41999 cost has been computed, and false if subexpressions should be
42000 scanned. In either case, *TOTAL contains the cost result. */
42003 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
42007 enum rtx_code code = (enum rtx_code) code_i;
42008 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
42009 machine_mode mode = GET_MODE (x);
42010 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
42015 if (register_operand (SET_DEST (x), VOIDmode)
42016 && reg_or_0_operand (SET_SRC (x), VOIDmode))
42018 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
42027 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
42029 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
42031 else if (flag_pic && SYMBOLIC_CONST (x)
42033 && (GET_CODE (x) == LABEL_REF
42034 || (GET_CODE (x) == SYMBOL_REF
42035 && SYMBOL_REF_LOCAL_P (x)))))
42042 if (mode == VOIDmode)
42047 switch (standard_80387_constant_p (x))
42052 default: /* Other constants */
42059 if (SSE_FLOAT_MODE_P (mode))
42062 switch (standard_sse_constant_p (x))
42066 case 1: /* 0: xor eliminates false dependency */
42069 default: /* -1: cmp contains false dependency */
42074 /* Fall back to (MEM (SYMBOL_REF)), since that's where
42075 it'll probably end up. Add a penalty for size. */
42076 *total = (COSTS_N_INSNS (1)
42077 + (flag_pic != 0 && !TARGET_64BIT)
42078 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
42082 /* The zero extensions is often completely free on x86_64, so make
42083 it as cheap as possible. */
42084 if (TARGET_64BIT && mode == DImode
42085 && GET_MODE (XEXP (x, 0)) == SImode)
42087 else if (TARGET_ZERO_EXTEND_WITH_AND)
42088 *total = cost->add;
42090 *total = cost->movzx;
42094 *total = cost->movsx;
42098 if (SCALAR_INT_MODE_P (mode)
42099 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
42100 && CONST_INT_P (XEXP (x, 1)))
42102 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42105 *total = cost->add;
42108 if ((value == 2 || value == 3)
42109 && cost->lea <= cost->shift_const)
42111 *total = cost->lea;
42121 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42123 /* ??? Should be SSE vector operation cost. */
42124 /* At least for published AMD latencies, this really is the same
42125 as the latency for a simple fpu operation like fabs. */
42126 /* V*QImode is emulated with 1-11 insns. */
42127 if (mode == V16QImode || mode == V32QImode)
42130 if (TARGET_XOP && mode == V16QImode)
42132 /* For XOP we use vpshab, which requires a broadcast of the
42133 value to the variable shift insn. For constants this
42134 means a V16Q const in mem; even when we can perform the
42135 shift with one insn set the cost to prefer paddb. */
42136 if (CONSTANT_P (XEXP (x, 1)))
42138 *total = (cost->fabs
42139 + rtx_cost (XEXP (x, 0), code, 0, speed)
42140 + (speed ? 2 : COSTS_N_BYTES (16)));
42145 else if (TARGET_SSSE3)
42147 *total = cost->fabs * count;
42150 *total = cost->fabs;
42152 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42154 if (CONST_INT_P (XEXP (x, 1)))
42156 if (INTVAL (XEXP (x, 1)) > 32)
42157 *total = cost->shift_const + COSTS_N_INSNS (2);
42159 *total = cost->shift_const * 2;
42163 if (GET_CODE (XEXP (x, 1)) == AND)
42164 *total = cost->shift_var * 2;
42166 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42171 if (CONST_INT_P (XEXP (x, 1)))
42172 *total = cost->shift_const;
42173 else if (GET_CODE (XEXP (x, 1)) == SUBREG
42174 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42176 /* Return the cost after shift-and truncation. */
42177 *total = cost->shift_var;
42181 *total = cost->shift_var;
42189 gcc_assert (FLOAT_MODE_P (mode));
42190 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42192 /* ??? SSE scalar/vector cost should be used here. */
42193 /* ??? Bald assumption that fma has the same cost as fmul. */
42194 *total = cost->fmul;
42195 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
42197 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42199 if (GET_CODE (sub) == NEG)
42200 sub = XEXP (sub, 0);
42201 *total += rtx_cost (sub, FMA, 0, speed);
42204 if (GET_CODE (sub) == NEG)
42205 sub = XEXP (sub, 0);
42206 *total += rtx_cost (sub, FMA, 2, speed);
42211 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42213 /* ??? SSE scalar cost should be used here. */
42214 *total = cost->fmul;
42217 else if (X87_FLOAT_MODE_P (mode))
42219 *total = cost->fmul;
42222 else if (FLOAT_MODE_P (mode))
42224 /* ??? SSE vector cost should be used here. */
42225 *total = cost->fmul;
42228 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42230 /* V*QImode is emulated with 7-13 insns. */
42231 if (mode == V16QImode || mode == V32QImode)
42234 if (TARGET_XOP && mode == V16QImode)
42236 else if (TARGET_SSSE3)
42238 *total = cost->fmul * 2 + cost->fabs * extra;
42240 /* V*DImode is emulated with 5-8 insns. */
42241 else if (mode == V2DImode || mode == V4DImode)
42243 if (TARGET_XOP && mode == V2DImode)
42244 *total = cost->fmul * 2 + cost->fabs * 3;
42246 *total = cost->fmul * 3 + cost->fabs * 5;
42248 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42249 insns, including two PMULUDQ. */
42250 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42251 *total = cost->fmul * 2 + cost->fabs * 5;
42253 *total = cost->fmul;
42258 rtx op0 = XEXP (x, 0);
42259 rtx op1 = XEXP (x, 1);
42261 if (CONST_INT_P (XEXP (x, 1)))
42263 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42264 for (nbits = 0; value != 0; value &= value - 1)
42268 /* This is arbitrary. */
42271 /* Compute costs correctly for widening multiplication. */
42272 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42273 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42274 == GET_MODE_SIZE (mode))
42276 int is_mulwiden = 0;
42277 machine_mode inner_mode = GET_MODE (op0);
42279 if (GET_CODE (op0) == GET_CODE (op1))
42280 is_mulwiden = 1, op1 = XEXP (op1, 0);
42281 else if (CONST_INT_P (op1))
42283 if (GET_CODE (op0) == SIGN_EXTEND)
42284 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42287 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42291 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42294 *total = (cost->mult_init[MODE_INDEX (mode)]
42295 + nbits * cost->mult_bit
42296 + rtx_cost (op0, outer_code, opno, speed)
42297 + rtx_cost (op1, outer_code, opno, speed));
42306 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42307 /* ??? SSE cost should be used here. */
42308 *total = cost->fdiv;
42309 else if (X87_FLOAT_MODE_P (mode))
42310 *total = cost->fdiv;
42311 else if (FLOAT_MODE_P (mode))
42312 /* ??? SSE vector cost should be used here. */
42313 *total = cost->fdiv;
42315 *total = cost->divide[MODE_INDEX (mode)];
42319 if (GET_MODE_CLASS (mode) == MODE_INT
42320 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42322 if (GET_CODE (XEXP (x, 0)) == PLUS
42323 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42324 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42325 && CONSTANT_P (XEXP (x, 1)))
42327 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42328 if (val == 2 || val == 4 || val == 8)
42330 *total = cost->lea;
42331 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42332 outer_code, opno, speed);
42333 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42334 outer_code, opno, speed);
42335 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42339 else if (GET_CODE (XEXP (x, 0)) == MULT
42340 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42342 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42343 if (val == 2 || val == 4 || val == 8)
42345 *total = cost->lea;
42346 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42347 outer_code, opno, speed);
42348 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42352 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42354 *total = cost->lea;
42355 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42356 outer_code, opno, speed);
42357 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42358 outer_code, opno, speed);
42359 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42366 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42368 /* ??? SSE cost should be used here. */
42369 *total = cost->fadd;
42372 else if (X87_FLOAT_MODE_P (mode))
42374 *total = cost->fadd;
42377 else if (FLOAT_MODE_P (mode))
42379 /* ??? SSE vector cost should be used here. */
42380 *total = cost->fadd;
42388 if (GET_MODE_CLASS (mode) == MODE_INT
42389 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42391 *total = (cost->add * 2
42392 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42393 << (GET_MODE (XEXP (x, 0)) != DImode))
42394 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42395 << (GET_MODE (XEXP (x, 1)) != DImode)));
42401 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42403 /* ??? SSE cost should be used here. */
42404 *total = cost->fchs;
42407 else if (X87_FLOAT_MODE_P (mode))
42409 *total = cost->fchs;
42412 else if (FLOAT_MODE_P (mode))
42414 /* ??? SSE vector cost should be used here. */
42415 *total = cost->fchs;
42421 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42423 /* ??? Should be SSE vector operation cost. */
42424 /* At least for published AMD latencies, this really is the same
42425 as the latency for a simple fpu operation like fabs. */
42426 *total = cost->fabs;
42428 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42429 *total = cost->add * 2;
42431 *total = cost->add;
42435 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42436 && XEXP (XEXP (x, 0), 1) == const1_rtx
42437 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42438 && XEXP (x, 1) == const0_rtx)
42440 /* This kind of construct is implemented using test[bwl].
42441 Treat it as if we had an AND. */
42442 *total = (cost->add
42443 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42444 + rtx_cost (const1_rtx, outer_code, opno, speed));
42450 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42455 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42456 /* ??? SSE cost should be used here. */
42457 *total = cost->fabs;
42458 else if (X87_FLOAT_MODE_P (mode))
42459 *total = cost->fabs;
42460 else if (FLOAT_MODE_P (mode))
42461 /* ??? SSE vector cost should be used here. */
42462 *total = cost->fabs;
42466 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42467 /* ??? SSE cost should be used here. */
42468 *total = cost->fsqrt;
42469 else if (X87_FLOAT_MODE_P (mode))
42470 *total = cost->fsqrt;
42471 else if (FLOAT_MODE_P (mode))
42472 /* ??? SSE vector cost should be used here. */
42473 *total = cost->fsqrt;
42477 if (XINT (x, 1) == UNSPEC_TP)
42483 case VEC_DUPLICATE:
42484 /* ??? Assume all of these vector manipulation patterns are
42485 recognizable. In which case they all pretty much have the
42487 *total = cost->fabs;
42490 mask = XEXP (x, 2);
42491 /* This is masked instruction, assume the same cost,
42492 as nonmasked variant. */
42493 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42494 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42496 *total = cost->fabs;
42506 static int current_machopic_label_num;
42508 /* Given a symbol name and its associated stub, write out the
42509 definition of the stub. */
42512 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42514 unsigned int length;
42515 char *binder_name, *symbol_name, lazy_ptr_name[32];
42516 int label = ++current_machopic_label_num;
42518 /* For 64-bit we shouldn't get here. */
42519 gcc_assert (!TARGET_64BIT);
42521 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42522 symb = targetm.strip_name_encoding (symb);
42524 length = strlen (stub);
42525 binder_name = XALLOCAVEC (char, length + 32);
42526 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42528 length = strlen (symb);
42529 symbol_name = XALLOCAVEC (char, length + 32);
42530 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42532 sprintf (lazy_ptr_name, "L%d$lz", label);
42534 if (MACHOPIC_ATT_STUB)
42535 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42536 else if (MACHOPIC_PURE)
42537 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42539 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42541 fprintf (file, "%s:\n", stub);
42542 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42544 if (MACHOPIC_ATT_STUB)
42546 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42548 else if (MACHOPIC_PURE)
42551 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42552 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42553 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42554 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42555 label, lazy_ptr_name, label);
42556 fprintf (file, "\tjmp\t*%%ecx\n");
42559 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42561 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42562 it needs no stub-binding-helper. */
42563 if (MACHOPIC_ATT_STUB)
42566 fprintf (file, "%s:\n", binder_name);
42570 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42571 fprintf (file, "\tpushl\t%%ecx\n");
42574 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42576 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42578 /* N.B. Keep the correspondence of these
42579 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42580 old-pic/new-pic/non-pic stubs; altering this will break
42581 compatibility with existing dylibs. */
42584 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42585 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42588 /* 16-byte -mdynamic-no-pic stub. */
42589 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42591 fprintf (file, "%s:\n", lazy_ptr_name);
42592 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42593 fprintf (file, ASM_LONG "%s\n", binder_name);
42595 #endif /* TARGET_MACHO */
42597 /* Order the registers for register allocator. */
42600 x86_order_regs_for_local_alloc (void)
42605 /* First allocate the local general purpose registers. */
42606 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42607 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42608 reg_alloc_order [pos++] = i;
42610 /* Global general purpose registers. */
42611 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42612 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42613 reg_alloc_order [pos++] = i;
42615 /* x87 registers come first in case we are doing FP math
42617 if (!TARGET_SSE_MATH)
42618 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42619 reg_alloc_order [pos++] = i;
42621 /* SSE registers. */
42622 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42623 reg_alloc_order [pos++] = i;
42624 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42625 reg_alloc_order [pos++] = i;
42627 /* Extended REX SSE registers. */
42628 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42629 reg_alloc_order [pos++] = i;
42631 /* Mask register. */
42632 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42633 reg_alloc_order [pos++] = i;
42635 /* MPX bound registers. */
42636 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42637 reg_alloc_order [pos++] = i;
42639 /* x87 registers. */
42640 if (TARGET_SSE_MATH)
42641 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42642 reg_alloc_order [pos++] = i;
42644 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42645 reg_alloc_order [pos++] = i;
42647 /* Initialize the rest of array as we do not allocate some registers
42649 while (pos < FIRST_PSEUDO_REGISTER)
42650 reg_alloc_order [pos++] = 0;
42653 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42654 in struct attribute_spec handler. */
42656 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42659 bool *no_add_attrs)
42661 if (TREE_CODE (*node) != FUNCTION_TYPE
42662 && TREE_CODE (*node) != METHOD_TYPE
42663 && TREE_CODE (*node) != FIELD_DECL
42664 && TREE_CODE (*node) != TYPE_DECL)
42666 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42668 *no_add_attrs = true;
42673 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42675 *no_add_attrs = true;
42678 if (is_attribute_p ("callee_pop_aggregate_return", name))
42682 cst = TREE_VALUE (args);
42683 if (TREE_CODE (cst) != INTEGER_CST)
42685 warning (OPT_Wattributes,
42686 "%qE attribute requires an integer constant argument",
42688 *no_add_attrs = true;
42690 else if (compare_tree_int (cst, 0) != 0
42691 && compare_tree_int (cst, 1) != 0)
42693 warning (OPT_Wattributes,
42694 "argument to %qE attribute is neither zero, nor one",
42696 *no_add_attrs = true;
42705 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42706 struct attribute_spec.handler. */
42708 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42709 bool *no_add_attrs)
42711 if (TREE_CODE (*node) != FUNCTION_TYPE
42712 && TREE_CODE (*node) != METHOD_TYPE
42713 && TREE_CODE (*node) != FIELD_DECL
42714 && TREE_CODE (*node) != TYPE_DECL)
42716 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42718 *no_add_attrs = true;
42722 /* Can combine regparm with all attributes but fastcall. */
42723 if (is_attribute_p ("ms_abi", name))
42725 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42727 error ("ms_abi and sysv_abi attributes are not compatible");
42732 else if (is_attribute_p ("sysv_abi", name))
42734 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42736 error ("ms_abi and sysv_abi attributes are not compatible");
42745 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42746 struct attribute_spec.handler. */
42748 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
42749 bool *no_add_attrs)
42752 if (DECL_P (*node))
42754 if (TREE_CODE (*node) == TYPE_DECL)
42755 type = &TREE_TYPE (*node);
42760 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
42762 warning (OPT_Wattributes, "%qE attribute ignored",
42764 *no_add_attrs = true;
42767 else if ((is_attribute_p ("ms_struct", name)
42768 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
42769 || ((is_attribute_p ("gcc_struct", name)
42770 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
42772 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
42774 *no_add_attrs = true;
42781 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
42782 bool *no_add_attrs)
42784 if (TREE_CODE (*node) != FUNCTION_DECL)
42786 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42788 *no_add_attrs = true;
42794 ix86_ms_bitfield_layout_p (const_tree record_type)
42796 return ((TARGET_MS_BITFIELD_LAYOUT
42797 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
42798 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
42801 /* Returns an expression indicating where the this parameter is
42802 located on entry to the FUNCTION. */
42805 x86_this_parameter (tree function)
42807 tree type = TREE_TYPE (function);
42808 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
42813 const int *parm_regs;
42815 if (ix86_function_type_abi (type) == MS_ABI)
42816 parm_regs = x86_64_ms_abi_int_parameter_registers;
42818 parm_regs = x86_64_int_parameter_registers;
42819 return gen_rtx_REG (Pmode, parm_regs[aggr]);
42822 nregs = ix86_function_regparm (type, function);
42824 if (nregs > 0 && !stdarg_p (type))
42827 unsigned int ccvt = ix86_get_callcvt (type);
42829 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42830 regno = aggr ? DX_REG : CX_REG;
42831 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42835 return gen_rtx_MEM (SImode,
42836 plus_constant (Pmode, stack_pointer_rtx, 4));
42845 return gen_rtx_MEM (SImode,
42846 plus_constant (Pmode,
42847 stack_pointer_rtx, 4));
42850 return gen_rtx_REG (SImode, regno);
42853 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
42857 /* Determine whether x86_output_mi_thunk can succeed. */
42860 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
42861 const_tree function)
42863 /* 64-bit can handle anything. */
42867 /* For 32-bit, everything's fine if we have one free register. */
42868 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
42871 /* Need a free register for vcall_offset. */
42875 /* Need a free register for GOT references. */
42876 if (flag_pic && !targetm.binds_local_p (function))
42879 /* Otherwise ok. */
42883 /* Output the assembler code for a thunk function. THUNK_DECL is the
42884 declaration for the thunk function itself, FUNCTION is the decl for
42885 the target function. DELTA is an immediate constant offset to be
42886 added to THIS. If VCALL_OFFSET is nonzero, the word at
42887 *(*this + vcall_offset) should be added to THIS. */
42890 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
42891 HOST_WIDE_INT vcall_offset, tree function)
42893 rtx this_param = x86_this_parameter (function);
42894 rtx this_reg, tmp, fnaddr;
42895 unsigned int tmp_regno;
42899 tmp_regno = R10_REG;
42902 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
42903 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
42904 tmp_regno = AX_REG;
42905 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
42906 tmp_regno = DX_REG;
42908 tmp_regno = CX_REG;
42911 emit_note (NOTE_INSN_PROLOGUE_END);
42913 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
42914 pull it in now and let DELTA benefit. */
42915 if (REG_P (this_param))
42916 this_reg = this_param;
42917 else if (vcall_offset)
42919 /* Put the this parameter into %eax. */
42920 this_reg = gen_rtx_REG (Pmode, AX_REG);
42921 emit_move_insn (this_reg, this_param);
42924 this_reg = NULL_RTX;
42926 /* Adjust the this parameter by a fixed constant. */
42929 rtx delta_rtx = GEN_INT (delta);
42930 rtx delta_dst = this_reg ? this_reg : this_param;
42934 if (!x86_64_general_operand (delta_rtx, Pmode))
42936 tmp = gen_rtx_REG (Pmode, tmp_regno);
42937 emit_move_insn (tmp, delta_rtx);
42942 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
42945 /* Adjust the this parameter by a value stored in the vtable. */
42948 rtx vcall_addr, vcall_mem, this_mem;
42950 tmp = gen_rtx_REG (Pmode, tmp_regno);
42952 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
42953 if (Pmode != ptr_mode)
42954 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
42955 emit_move_insn (tmp, this_mem);
42957 /* Adjust the this parameter. */
42958 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
42960 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
42962 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
42963 emit_move_insn (tmp2, GEN_INT (vcall_offset));
42964 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
42967 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
42968 if (Pmode != ptr_mode)
42969 emit_insn (gen_addsi_1_zext (this_reg,
42970 gen_rtx_REG (ptr_mode,
42974 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
42977 /* If necessary, drop THIS back to its stack slot. */
42978 if (this_reg && this_reg != this_param)
42979 emit_move_insn (this_param, this_reg);
42981 fnaddr = XEXP (DECL_RTL (function), 0);
42984 if (!flag_pic || targetm.binds_local_p (function)
42989 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
42990 tmp = gen_rtx_CONST (Pmode, tmp);
42991 fnaddr = gen_const_mem (Pmode, tmp);
42996 if (!flag_pic || targetm.binds_local_p (function))
42999 else if (TARGET_MACHO)
43001 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
43002 fnaddr = XEXP (fnaddr, 0);
43004 #endif /* TARGET_MACHO */
43007 tmp = gen_rtx_REG (Pmode, CX_REG);
43008 output_set_got (tmp, NULL_RTX);
43010 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
43011 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
43012 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
43013 fnaddr = gen_const_mem (Pmode, fnaddr);
43017 /* Our sibling call patterns do not allow memories, because we have no
43018 predicate that can distinguish between frame and non-frame memory.
43019 For our purposes here, we can get away with (ab)using a jump pattern,
43020 because we're going to do no optimization. */
43021 if (MEM_P (fnaddr))
43023 if (sibcall_insn_operand (fnaddr, word_mode))
43025 fnaddr = XEXP (DECL_RTL (function), 0);
43026 tmp = gen_rtx_MEM (QImode, fnaddr);
43027 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43028 tmp = emit_call_insn (tmp);
43029 SIBLING_CALL_P (tmp) = 1;
43032 emit_jump_insn (gen_indirect_jump (fnaddr));
43036 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
43038 // CM_LARGE_PIC always uses pseudo PIC register which is
43039 // uninitialized. Since FUNCTION is local and calling it
43040 // doesn't go through PLT, we use scratch register %r11 as
43041 // PIC register and initialize it here.
43042 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
43043 ix86_init_large_pic_reg (tmp_regno);
43044 fnaddr = legitimize_pic_address (fnaddr,
43045 gen_rtx_REG (Pmode, tmp_regno));
43048 if (!sibcall_insn_operand (fnaddr, word_mode))
43050 tmp = gen_rtx_REG (word_mode, tmp_regno);
43051 if (GET_MODE (fnaddr) != word_mode)
43052 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
43053 emit_move_insn (tmp, fnaddr);
43057 tmp = gen_rtx_MEM (QImode, fnaddr);
43058 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43059 tmp = emit_call_insn (tmp);
43060 SIBLING_CALL_P (tmp) = 1;
43064 /* Emit just enough of rest_of_compilation to get the insns emitted.
43065 Note that use_thunk calls assemble_start_function et al. */
43066 insn = get_insns ();
43067 shorten_branches (insn);
43068 final_start_function (insn, file, 1);
43069 final (insn, file, 1);
43070 final_end_function ();
43074 x86_file_start (void)
43076 default_file_start ();
43078 fputs ("\t.code16gcc\n", asm_out_file);
43080 darwin_file_start ();
43082 if (X86_FILE_START_VERSION_DIRECTIVE)
43083 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
43084 if (X86_FILE_START_FLTUSED)
43085 fputs ("\t.global\t__fltused\n", asm_out_file);
43086 if (ix86_asm_dialect == ASM_INTEL)
43087 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
43091 x86_field_alignment (tree field, int computed)
43094 tree type = TREE_TYPE (field);
43096 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
43098 mode = TYPE_MODE (strip_array_types (type));
43099 if (mode == DFmode || mode == DCmode
43100 || GET_MODE_CLASS (mode) == MODE_INT
43101 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
43102 return MIN (32, computed);
43106 /* Print call to TARGET to FILE. */
43109 x86_print_call_or_nop (FILE *file, const char *target)
43111 if (flag_nop_mcount)
43112 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
43114 fprintf (file, "1:\tcall\t%s\n", target);
43117 /* Output assembler code to FILE to increment profiler label # LABELNO
43118 for profiling a function entry. */
43120 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
43122 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
43126 #ifndef NO_PROFILE_COUNTERS
43127 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
43130 if (!TARGET_PECOFF && flag_pic)
43131 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
43133 x86_print_call_or_nop (file, mcount_name);
43137 #ifndef NO_PROFILE_COUNTERS
43138 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
43141 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
43145 #ifndef NO_PROFILE_COUNTERS
43146 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
43149 x86_print_call_or_nop (file, mcount_name);
43152 if (flag_record_mcount)
43154 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
43155 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
43156 fprintf (file, "\t.previous\n");
43160 /* We don't have exact information about the insn sizes, but we may assume
43161 quite safely that we are informed about all 1 byte insns and memory
43162 address sizes. This is enough to eliminate unnecessary padding in
43166 min_insn_size (rtx_insn *insn)
43170 if (!INSN_P (insn) || !active_insn_p (insn))
43173 /* Discard alignments we've emit and jump instructions. */
43174 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43175 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43178 /* Important case - calls are always 5 bytes.
43179 It is common to have many calls in the row. */
43181 && symbolic_reference_mentioned_p (PATTERN (insn))
43182 && !SIBLING_CALL_P (insn))
43184 len = get_attr_length (insn);
43188 /* For normal instructions we rely on get_attr_length being exact,
43189 with a few exceptions. */
43190 if (!JUMP_P (insn))
43192 enum attr_type type = get_attr_type (insn);
43197 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43198 || asm_noperands (PATTERN (insn)) >= 0)
43205 /* Otherwise trust get_attr_length. */
43209 l = get_attr_length_address (insn);
43210 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43219 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43221 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43225 ix86_avoid_jump_mispredicts (void)
43227 rtx_insn *insn, *start = get_insns ();
43228 int nbytes = 0, njumps = 0;
43229 bool isjump = false;
43231 /* Look for all minimal intervals of instructions containing 4 jumps.
43232 The intervals are bounded by START and INSN. NBYTES is the total
43233 size of instructions in the interval including INSN and not including
43234 START. When the NBYTES is smaller than 16 bytes, it is possible
43235 that the end of START and INSN ends up in the same 16byte page.
43237 The smallest offset in the page INSN can start is the case where START
43238 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43239 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43241 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43242 have to, control transfer to label(s) can be performed through other
43243 means, and also we estimate minimum length of all asm stmts as 0. */
43244 for (insn = start; insn; insn = NEXT_INSN (insn))
43248 if (LABEL_P (insn))
43250 int align = label_to_alignment (insn);
43251 int max_skip = label_to_max_skip (insn);
43255 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43256 already in the current 16 byte page, because otherwise
43257 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43258 bytes to reach 16 byte boundary. */
43260 || (align <= 3 && max_skip != (1 << align) - 1))
43263 fprintf (dump_file, "Label %i with max_skip %i\n",
43264 INSN_UID (insn), max_skip);
43267 while (nbytes + max_skip >= 16)
43269 start = NEXT_INSN (start);
43270 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43272 njumps--, isjump = true;
43275 nbytes -= min_insn_size (start);
43281 min_size = min_insn_size (insn);
43282 nbytes += min_size;
43284 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43285 INSN_UID (insn), min_size);
43286 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43294 start = NEXT_INSN (start);
43295 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43297 njumps--, isjump = true;
43300 nbytes -= min_insn_size (start);
43302 gcc_assert (njumps >= 0);
43304 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43305 INSN_UID (start), INSN_UID (insn), nbytes);
43307 if (njumps == 3 && isjump && nbytes < 16)
43309 int padsize = 15 - nbytes + min_insn_size (insn);
43312 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43313 INSN_UID (insn), padsize);
43314 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43320 /* AMD Athlon works faster
43321 when RET is not destination of conditional jump or directly preceded
43322 by other jump instruction. We avoid the penalty by inserting NOP just
43323 before the RET instructions in such cases. */
43325 ix86_pad_returns (void)
43330 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43332 basic_block bb = e->src;
43333 rtx_insn *ret = BB_END (bb);
43335 bool replace = false;
43337 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43338 || optimize_bb_for_size_p (bb))
43340 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43341 if (active_insn_p (prev) || LABEL_P (prev))
43343 if (prev && LABEL_P (prev))
43348 FOR_EACH_EDGE (e, ei, bb->preds)
43349 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43350 && !(e->flags & EDGE_FALLTHRU))
43358 prev = prev_active_insn (ret);
43360 && ((JUMP_P (prev) && any_condjump_p (prev))
43363 /* Empty functions get branch mispredict even when
43364 the jump destination is not visible to us. */
43365 if (!prev && !optimize_function_for_size_p (cfun))
43370 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43376 /* Count the minimum number of instructions in BB. Return 4 if the
43377 number of instructions >= 4. */
43380 ix86_count_insn_bb (basic_block bb)
43383 int insn_count = 0;
43385 /* Count number of instructions in this block. Return 4 if the number
43386 of instructions >= 4. */
43387 FOR_BB_INSNS (bb, insn)
43389 /* Only happen in exit blocks. */
43391 && ANY_RETURN_P (PATTERN (insn)))
43394 if (NONDEBUG_INSN_P (insn)
43395 && GET_CODE (PATTERN (insn)) != USE
43396 && GET_CODE (PATTERN (insn)) != CLOBBER)
43399 if (insn_count >= 4)
43408 /* Count the minimum number of instructions in code path in BB.
43409 Return 4 if the number of instructions >= 4. */
43412 ix86_count_insn (basic_block bb)
43416 int min_prev_count;
43418 /* Only bother counting instructions along paths with no
43419 more than 2 basic blocks between entry and exit. Given
43420 that BB has an edge to exit, determine if a predecessor
43421 of BB has an edge from entry. If so, compute the number
43422 of instructions in the predecessor block. If there
43423 happen to be multiple such blocks, compute the minimum. */
43424 min_prev_count = 4;
43425 FOR_EACH_EDGE (e, ei, bb->preds)
43428 edge_iterator prev_ei;
43430 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43432 min_prev_count = 0;
43435 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43437 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43439 int count = ix86_count_insn_bb (e->src);
43440 if (count < min_prev_count)
43441 min_prev_count = count;
43447 if (min_prev_count < 4)
43448 min_prev_count += ix86_count_insn_bb (bb);
43450 return min_prev_count;
43453 /* Pad short function to 4 instructions. */
43456 ix86_pad_short_function (void)
43461 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43463 rtx_insn *ret = BB_END (e->src);
43464 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43466 int insn_count = ix86_count_insn (e->src);
43468 /* Pad short function. */
43469 if (insn_count < 4)
43471 rtx_insn *insn = ret;
43473 /* Find epilogue. */
43476 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43477 insn = PREV_INSN (insn);
43482 /* Two NOPs count as one instruction. */
43483 insn_count = 2 * (4 - insn_count);
43484 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43490 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43491 the epilogue, the Windows system unwinder will apply epilogue logic and
43492 produce incorrect offsets. This can be avoided by adding a nop between
43493 the last insn that can throw and the first insn of the epilogue. */
43496 ix86_seh_fixup_eh_fallthru (void)
43501 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43503 rtx_insn *insn, *next;
43505 /* Find the beginning of the epilogue. */
43506 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43507 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43512 /* We only care about preceding insns that can throw. */
43513 insn = prev_active_insn (insn);
43514 if (insn == NULL || !can_throw_internal (insn))
43517 /* Do not separate calls from their debug information. */
43518 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43520 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43521 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43526 emit_insn_after (gen_nops (const1_rtx), insn);
43530 /* Implement machine specific optimizations. We implement padding of returns
43531 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43535 /* We are freeing block_for_insn in the toplev to keep compatibility
43536 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43537 compute_bb_for_insn ();
43539 if (TARGET_SEH && current_function_has_exception_handlers ())
43540 ix86_seh_fixup_eh_fallthru ();
43542 if (optimize && optimize_function_for_speed_p (cfun))
43544 if (TARGET_PAD_SHORT_FUNCTION)
43545 ix86_pad_short_function ();
43546 else if (TARGET_PAD_RETURNS)
43547 ix86_pad_returns ();
43548 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43549 if (TARGET_FOUR_JUMP_LIMIT)
43550 ix86_avoid_jump_mispredicts ();
43555 /* Return nonzero when QImode register that must be represented via REX prefix
43558 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43561 extract_insn_cached (insn);
43562 for (i = 0; i < recog_data.n_operands; i++)
43563 if (GENERAL_REG_P (recog_data.operand[i])
43564 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43569 /* Return true when INSN mentions register that must be encoded using REX
43572 x86_extended_reg_mentioned_p (rtx insn)
43574 subrtx_iterator::array_type array;
43575 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43577 const_rtx x = *iter;
43579 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43585 /* If profitable, negate (without causing overflow) integer constant
43586 of mode MODE at location LOC. Return true in this case. */
43588 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43592 if (!CONST_INT_P (*loc))
43598 /* DImode x86_64 constants must fit in 32 bits. */
43599 gcc_assert (x86_64_immediate_operand (*loc, mode));
43610 gcc_unreachable ();
43613 /* Avoid overflows. */
43614 if (mode_signbit_p (mode, *loc))
43617 val = INTVAL (*loc);
43619 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43620 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43621 if ((val < 0 && val != -128)
43624 *loc = GEN_INT (-val);
43631 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43632 optabs would emit if we didn't have TFmode patterns. */
43635 x86_emit_floatuns (rtx operands[2])
43637 rtx_code_label *neglab, *donelab;
43638 rtx i0, i1, f0, in, out;
43639 machine_mode mode, inmode;
43641 inmode = GET_MODE (operands[1]);
43642 gcc_assert (inmode == SImode || inmode == DImode);
43645 in = force_reg (inmode, operands[1]);
43646 mode = GET_MODE (out);
43647 neglab = gen_label_rtx ();
43648 donelab = gen_label_rtx ();
43649 f0 = gen_reg_rtx (mode);
43651 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43653 expand_float (out, in, 0);
43655 emit_jump_insn (gen_jump (donelab));
43658 emit_label (neglab);
43660 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43662 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43664 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43666 expand_float (f0, i0, 0);
43668 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
43670 emit_label (donelab);
43673 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43674 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43675 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43676 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43678 /* Get a vector mode of the same size as the original but with elements
43679 twice as wide. This is only guaranteed to apply to integral vectors. */
43681 static inline machine_mode
43682 get_mode_wider_vector (machine_mode o)
43684 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43685 machine_mode n = GET_MODE_WIDER_MODE (o);
43686 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43687 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43691 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43692 fill target with val via vec_duplicate. */
43695 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43701 /* First attempt to recognize VAL as-is. */
43702 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43703 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
43704 if (recog_memoized (insn) < 0)
43707 /* If that fails, force VAL into a register. */
43710 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43711 seq = get_insns ();
43714 emit_insn_before (seq, insn);
43716 ok = recog_memoized (insn) >= 0;
43722 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43723 with all elements equal to VAR. Return true if successful. */
43726 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43727 rtx target, rtx val)
43751 return ix86_vector_duplicate_value (mode, target, val);
43756 if (TARGET_SSE || TARGET_3DNOW_A)
43760 val = gen_lowpart (SImode, val);
43761 x = gen_rtx_TRUNCATE (HImode, val);
43762 x = gen_rtx_VEC_DUPLICATE (mode, x);
43763 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43775 return ix86_vector_duplicate_value (mode, target, val);
43779 struct expand_vec_perm_d dperm;
43783 memset (&dperm, 0, sizeof (dperm));
43784 dperm.target = target;
43785 dperm.vmode = mode;
43786 dperm.nelt = GET_MODE_NUNITS (mode);
43787 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
43788 dperm.one_operand_p = true;
43790 /* Extend to SImode using a paradoxical SUBREG. */
43791 tmp1 = gen_reg_rtx (SImode);
43792 emit_move_insn (tmp1, gen_lowpart (SImode, val));
43794 /* Insert the SImode value as low element of a V4SImode vector. */
43795 tmp2 = gen_reg_rtx (V4SImode);
43796 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
43797 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
43799 ok = (expand_vec_perm_1 (&dperm)
43800 || expand_vec_perm_broadcast_1 (&dperm));
43808 return ix86_vector_duplicate_value (mode, target, val);
43815 /* Replicate the value once into the next wider mode and recurse. */
43817 machine_mode smode, wsmode, wvmode;
43820 smode = GET_MODE_INNER (mode);
43821 wvmode = get_mode_wider_vector (mode);
43822 wsmode = GET_MODE_INNER (wvmode);
43824 val = convert_modes (wsmode, smode, val, true);
43825 x = expand_simple_binop (wsmode, ASHIFT, val,
43826 GEN_INT (GET_MODE_BITSIZE (smode)),
43827 NULL_RTX, 1, OPTAB_LIB_WIDEN);
43828 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
43830 x = gen_reg_rtx (wvmode);
43831 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
43833 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
43840 return ix86_vector_duplicate_value (mode, target, val);
43843 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
43844 rtx x = gen_reg_rtx (hvmode);
43846 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43849 x = gen_rtx_VEC_CONCAT (mode, x, x);
43850 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43856 if (TARGET_AVX512BW)
43857 return ix86_vector_duplicate_value (mode, target, val);
43860 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
43861 rtx x = gen_reg_rtx (hvmode);
43863 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
43866 x = gen_rtx_VEC_CONCAT (mode, x, x);
43867 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43876 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43877 whose ONE_VAR element is VAR, and other elements are zero. Return true
43881 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
43882 rtx target, rtx var, int one_var)
43884 machine_mode vsimode;
43887 bool use_vector_set = false;
43892 /* For SSE4.1, we normally use vector set. But if the second
43893 element is zero and inter-unit moves are OK, we use movq
43895 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
43896 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
43902 use_vector_set = TARGET_SSE4_1;
43905 use_vector_set = TARGET_SSE2;
43908 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
43915 use_vector_set = TARGET_AVX;
43918 /* Use ix86_expand_vector_set in 64bit mode only. */
43919 use_vector_set = TARGET_AVX && TARGET_64BIT;
43925 if (use_vector_set)
43927 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
43928 var = force_reg (GET_MODE_INNER (mode), var);
43929 ix86_expand_vector_set (mmx_ok, target, var, one_var);
43945 var = force_reg (GET_MODE_INNER (mode), var);
43946 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
43947 emit_insn (gen_rtx_SET (VOIDmode, target, x));
43952 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
43953 new_target = gen_reg_rtx (mode);
43955 new_target = target;
43956 var = force_reg (GET_MODE_INNER (mode), var);
43957 x = gen_rtx_VEC_DUPLICATE (mode, var);
43958 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
43959 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
43962 /* We need to shuffle the value to the correct position, so
43963 create a new pseudo to store the intermediate result. */
43965 /* With SSE2, we can use the integer shuffle insns. */
43966 if (mode != V4SFmode && TARGET_SSE2)
43968 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
43970 GEN_INT (one_var == 1 ? 0 : 1),
43971 GEN_INT (one_var == 2 ? 0 : 1),
43972 GEN_INT (one_var == 3 ? 0 : 1)));
43973 if (target != new_target)
43974 emit_move_insn (target, new_target);
43978 /* Otherwise convert the intermediate result to V4SFmode and
43979 use the SSE1 shuffle instructions. */
43980 if (mode != V4SFmode)
43982 tmp = gen_reg_rtx (V4SFmode);
43983 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
43988 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
43990 GEN_INT (one_var == 1 ? 0 : 1),
43991 GEN_INT (one_var == 2 ? 0+4 : 1+4),
43992 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
43994 if (mode != V4SFmode)
43995 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
43996 else if (tmp != target)
43997 emit_move_insn (target, tmp);
43999 else if (target != new_target)
44000 emit_move_insn (target, new_target);
44005 vsimode = V4SImode;
44011 vsimode = V2SImode;
44017 /* Zero extend the variable element to SImode and recurse. */
44018 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
44020 x = gen_reg_rtx (vsimode);
44021 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
44023 gcc_unreachable ();
44025 emit_move_insn (target, gen_lowpart (mode, x));
44033 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
44034 consisting of the values in VALS. It is known that all elements
44035 except ONE_VAR are constants. Return true if successful. */
44038 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
44039 rtx target, rtx vals, int one_var)
44041 rtx var = XVECEXP (vals, 0, one_var);
44042 machine_mode wmode;
44045 const_vec = copy_rtx (vals);
44046 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
44047 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
44055 /* For the two element vectors, it's just as easy to use
44056 the general case. */
44060 /* Use ix86_expand_vector_set in 64bit mode only. */
44083 /* There's no way to set one QImode entry easily. Combine
44084 the variable value with its adjacent constant value, and
44085 promote to an HImode set. */
44086 x = XVECEXP (vals, 0, one_var ^ 1);
44089 var = convert_modes (HImode, QImode, var, true);
44090 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
44091 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44092 x = GEN_INT (INTVAL (x) & 0xff);
44096 var = convert_modes (HImode, QImode, var, true);
44097 x = gen_int_mode (INTVAL (x) << 8, HImode);
44099 if (x != const0_rtx)
44100 var = expand_simple_binop (HImode, IOR, var, x, var,
44101 1, OPTAB_LIB_WIDEN);
44103 x = gen_reg_rtx (wmode);
44104 emit_move_insn (x, gen_lowpart (wmode, const_vec));
44105 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
44107 emit_move_insn (target, gen_lowpart (mode, x));
44114 emit_move_insn (target, const_vec);
44115 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44119 /* A subroutine of ix86_expand_vector_init_general. Use vector
44120 concatenate to handle the most general case: all values variable,
44121 and none identical. */
44124 ix86_expand_vector_init_concat (machine_mode mode,
44125 rtx target, rtx *ops, int n)
44127 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
44128 rtx first[16], second[8], third[4];
44180 gcc_unreachable ();
44183 if (!register_operand (ops[1], cmode))
44184 ops[1] = force_reg (cmode, ops[1]);
44185 if (!register_operand (ops[0], cmode))
44186 ops[0] = force_reg (cmode, ops[0]);
44187 emit_insn (gen_rtx_SET (VOIDmode, target,
44188 gen_rtx_VEC_CONCAT (mode, ops[0],
44208 gcc_unreachable ();
44232 gcc_unreachable ();
44250 gcc_unreachable ();
44255 /* FIXME: We process inputs backward to help RA. PR 36222. */
44258 for (; i > 0; i -= 2, j--)
44260 first[j] = gen_reg_rtx (cmode);
44261 v = gen_rtvec (2, ops[i - 1], ops[i]);
44262 ix86_expand_vector_init (false, first[j],
44263 gen_rtx_PARALLEL (cmode, v));
44269 gcc_assert (hmode != VOIDmode);
44270 gcc_assert (gmode != VOIDmode);
44271 for (i = j = 0; i < n; i += 2, j++)
44273 second[j] = gen_reg_rtx (hmode);
44274 ix86_expand_vector_init_concat (hmode, second [j],
44278 for (i = j = 0; i < n; i += 2, j++)
44280 third[j] = gen_reg_rtx (gmode);
44281 ix86_expand_vector_init_concat (gmode, third[j],
44285 ix86_expand_vector_init_concat (mode, target, third, n);
44289 gcc_assert (hmode != VOIDmode);
44290 for (i = j = 0; i < n; i += 2, j++)
44292 second[j] = gen_reg_rtx (hmode);
44293 ix86_expand_vector_init_concat (hmode, second [j],
44297 ix86_expand_vector_init_concat (mode, target, second, n);
44300 ix86_expand_vector_init_concat (mode, target, first, n);
44304 gcc_unreachable ();
44308 /* A subroutine of ix86_expand_vector_init_general. Use vector
44309 interleave to handle the most general case: all values variable,
44310 and none identical. */
44313 ix86_expand_vector_init_interleave (machine_mode mode,
44314 rtx target, rtx *ops, int n)
44316 machine_mode first_imode, second_imode, third_imode, inner_mode;
44319 rtx (*gen_load_even) (rtx, rtx, rtx);
44320 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44321 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44326 gen_load_even = gen_vec_setv8hi;
44327 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44328 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44329 inner_mode = HImode;
44330 first_imode = V4SImode;
44331 second_imode = V2DImode;
44332 third_imode = VOIDmode;
44335 gen_load_even = gen_vec_setv16qi;
44336 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44337 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44338 inner_mode = QImode;
44339 first_imode = V8HImode;
44340 second_imode = V4SImode;
44341 third_imode = V2DImode;
44344 gcc_unreachable ();
44347 for (i = 0; i < n; i++)
44349 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44350 op0 = gen_reg_rtx (SImode);
44351 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44353 /* Insert the SImode value as low element of V4SImode vector. */
44354 op1 = gen_reg_rtx (V4SImode);
44355 op0 = gen_rtx_VEC_MERGE (V4SImode,
44356 gen_rtx_VEC_DUPLICATE (V4SImode,
44358 CONST0_RTX (V4SImode),
44360 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
44362 /* Cast the V4SImode vector back to a vector in orignal mode. */
44363 op0 = gen_reg_rtx (mode);
44364 emit_move_insn (op0, gen_lowpart (mode, op1));
44366 /* Load even elements into the second position. */
44367 emit_insn (gen_load_even (op0,
44368 force_reg (inner_mode,
44372 /* Cast vector to FIRST_IMODE vector. */
44373 ops[i] = gen_reg_rtx (first_imode);
44374 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44377 /* Interleave low FIRST_IMODE vectors. */
44378 for (i = j = 0; i < n; i += 2, j++)
44380 op0 = gen_reg_rtx (first_imode);
44381 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44383 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44384 ops[j] = gen_reg_rtx (second_imode);
44385 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44388 /* Interleave low SECOND_IMODE vectors. */
44389 switch (second_imode)
44392 for (i = j = 0; i < n / 2; i += 2, j++)
44394 op0 = gen_reg_rtx (second_imode);
44395 emit_insn (gen_interleave_second_low (op0, ops[i],
44398 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44400 ops[j] = gen_reg_rtx (third_imode);
44401 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44403 second_imode = V2DImode;
44404 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44408 op0 = gen_reg_rtx (second_imode);
44409 emit_insn (gen_interleave_second_low (op0, ops[0],
44412 /* Cast the SECOND_IMODE vector back to a vector on original
44414 emit_insn (gen_rtx_SET (VOIDmode, target,
44415 gen_lowpart (mode, op0)));
44419 gcc_unreachable ();
44423 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44424 all values variable, and none identical. */
44427 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44428 rtx target, rtx vals)
44430 rtx ops[64], op0, op1, op2, op3, op4, op5;
44431 machine_mode half_mode = VOIDmode;
44432 machine_mode quarter_mode = VOIDmode;
44439 if (!mmx_ok && !TARGET_SSE)
44455 n = GET_MODE_NUNITS (mode);
44456 for (i = 0; i < n; i++)
44457 ops[i] = XVECEXP (vals, 0, i);
44458 ix86_expand_vector_init_concat (mode, target, ops, n);
44462 half_mode = V16QImode;
44466 half_mode = V8HImode;
44470 n = GET_MODE_NUNITS (mode);
44471 for (i = 0; i < n; i++)
44472 ops[i] = XVECEXP (vals, 0, i);
44473 op0 = gen_reg_rtx (half_mode);
44474 op1 = gen_reg_rtx (half_mode);
44475 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44477 ix86_expand_vector_init_interleave (half_mode, op1,
44478 &ops [n >> 1], n >> 2);
44479 emit_insn (gen_rtx_SET (VOIDmode, target,
44480 gen_rtx_VEC_CONCAT (mode, op0, op1)));
44484 quarter_mode = V16QImode;
44485 half_mode = V32QImode;
44489 quarter_mode = V8HImode;
44490 half_mode = V16HImode;
44494 n = GET_MODE_NUNITS (mode);
44495 for (i = 0; i < n; i++)
44496 ops[i] = XVECEXP (vals, 0, i);
44497 op0 = gen_reg_rtx (quarter_mode);
44498 op1 = gen_reg_rtx (quarter_mode);
44499 op2 = gen_reg_rtx (quarter_mode);
44500 op3 = gen_reg_rtx (quarter_mode);
44501 op4 = gen_reg_rtx (half_mode);
44502 op5 = gen_reg_rtx (half_mode);
44503 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44505 ix86_expand_vector_init_interleave (quarter_mode, op1,
44506 &ops [n >> 2], n >> 3);
44507 ix86_expand_vector_init_interleave (quarter_mode, op2,
44508 &ops [n >> 1], n >> 3);
44509 ix86_expand_vector_init_interleave (quarter_mode, op3,
44510 &ops [(n >> 1) | (n >> 2)], n >> 3);
44511 emit_insn (gen_rtx_SET (VOIDmode, op4,
44512 gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44513 emit_insn (gen_rtx_SET (VOIDmode, op5,
44514 gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44515 emit_insn (gen_rtx_SET (VOIDmode, target,
44516 gen_rtx_VEC_CONCAT (mode, op4, op5)));
44520 if (!TARGET_SSE4_1)
44528 /* Don't use ix86_expand_vector_init_interleave if we can't
44529 move from GPR to SSE register directly. */
44530 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44533 n = GET_MODE_NUNITS (mode);
44534 for (i = 0; i < n; i++)
44535 ops[i] = XVECEXP (vals, 0, i);
44536 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44544 gcc_unreachable ();
44548 int i, j, n_elts, n_words, n_elt_per_word;
44549 machine_mode inner_mode;
44550 rtx words[4], shift;
44552 inner_mode = GET_MODE_INNER (mode);
44553 n_elts = GET_MODE_NUNITS (mode);
44554 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44555 n_elt_per_word = n_elts / n_words;
44556 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44558 for (i = 0; i < n_words; ++i)
44560 rtx word = NULL_RTX;
44562 for (j = 0; j < n_elt_per_word; ++j)
44564 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44565 elt = convert_modes (word_mode, inner_mode, elt, true);
44571 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44572 word, 1, OPTAB_LIB_WIDEN);
44573 word = expand_simple_binop (word_mode, IOR, word, elt,
44574 word, 1, OPTAB_LIB_WIDEN);
44582 emit_move_insn (target, gen_lowpart (mode, words[0]));
44583 else if (n_words == 2)
44585 rtx tmp = gen_reg_rtx (mode);
44586 emit_clobber (tmp);
44587 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44588 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44589 emit_move_insn (target, tmp);
44591 else if (n_words == 4)
44593 rtx tmp = gen_reg_rtx (V4SImode);
44594 gcc_assert (word_mode == SImode);
44595 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44596 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44597 emit_move_insn (target, gen_lowpart (mode, tmp));
44600 gcc_unreachable ();
44604 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44605 instructions unless MMX_OK is true. */
44608 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44610 machine_mode mode = GET_MODE (target);
44611 machine_mode inner_mode = GET_MODE_INNER (mode);
44612 int n_elts = GET_MODE_NUNITS (mode);
44613 int n_var = 0, one_var = -1;
44614 bool all_same = true, all_const_zero = true;
44618 for (i = 0; i < n_elts; ++i)
44620 x = XVECEXP (vals, 0, i);
44621 if (!(CONST_INT_P (x)
44622 || GET_CODE (x) == CONST_DOUBLE
44623 || GET_CODE (x) == CONST_FIXED))
44624 n_var++, one_var = i;
44625 else if (x != CONST0_RTX (inner_mode))
44626 all_const_zero = false;
44627 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44631 /* Constants are best loaded from the constant pool. */
44634 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44638 /* If all values are identical, broadcast the value. */
44640 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44641 XVECEXP (vals, 0, 0)))
44644 /* Values where only one field is non-constant are best loaded from
44645 the pool and overwritten via move later. */
44649 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44650 XVECEXP (vals, 0, one_var),
44654 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44658 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44662 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44664 machine_mode mode = GET_MODE (target);
44665 machine_mode inner_mode = GET_MODE_INNER (mode);
44666 machine_mode half_mode;
44667 bool use_vec_merge = false;
44669 static rtx (*gen_extract[6][2]) (rtx, rtx)
44671 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44672 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44673 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44674 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44675 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44676 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44678 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44680 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44681 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44682 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44683 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44684 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44685 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44695 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44696 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44698 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44700 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44701 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44707 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44711 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44712 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44714 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44716 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44717 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44724 /* For the two element vectors, we implement a VEC_CONCAT with
44725 the extraction of the other element. */
44727 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44728 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44731 op0 = val, op1 = tmp;
44733 op0 = tmp, op1 = val;
44735 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44736 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44741 use_vec_merge = TARGET_SSE4_1;
44748 use_vec_merge = true;
44752 /* tmp = target = A B C D */
44753 tmp = copy_to_reg (target);
44754 /* target = A A B B */
44755 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
44756 /* target = X A B B */
44757 ix86_expand_vector_set (false, target, val, 0);
44758 /* target = A X C D */
44759 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44760 const1_rtx, const0_rtx,
44761 GEN_INT (2+4), GEN_INT (3+4)));
44765 /* tmp = target = A B C D */
44766 tmp = copy_to_reg (target);
44767 /* tmp = X B C D */
44768 ix86_expand_vector_set (false, tmp, val, 0);
44769 /* target = A B X D */
44770 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44771 const0_rtx, const1_rtx,
44772 GEN_INT (0+4), GEN_INT (3+4)));
44776 /* tmp = target = A B C D */
44777 tmp = copy_to_reg (target);
44778 /* tmp = X B C D */
44779 ix86_expand_vector_set (false, tmp, val, 0);
44780 /* target = A B X D */
44781 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44782 const0_rtx, const1_rtx,
44783 GEN_INT (2+4), GEN_INT (0+4)));
44787 gcc_unreachable ();
44792 use_vec_merge = TARGET_SSE4_1;
44796 /* Element 0 handled by vec_merge below. */
44799 use_vec_merge = true;
44805 /* With SSE2, use integer shuffles to swap element 0 and ELT,
44806 store into element 0, then shuffle them back. */
44810 order[0] = GEN_INT (elt);
44811 order[1] = const1_rtx;
44812 order[2] = const2_rtx;
44813 order[3] = GEN_INT (3);
44814 order[elt] = const0_rtx;
44816 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44817 order[1], order[2], order[3]));
44819 ix86_expand_vector_set (false, target, val, 0);
44821 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
44822 order[1], order[2], order[3]));
44826 /* For SSE1, we have to reuse the V4SF code. */
44827 rtx t = gen_reg_rtx (V4SFmode);
44828 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
44829 emit_move_insn (target, gen_lowpart (mode, t));
44834 use_vec_merge = TARGET_SSE2;
44837 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
44841 use_vec_merge = TARGET_SSE4_1;
44848 half_mode = V16QImode;
44854 half_mode = V8HImode;
44860 half_mode = V4SImode;
44866 half_mode = V2DImode;
44872 half_mode = V4SFmode;
44878 half_mode = V2DFmode;
44884 /* Compute offset. */
44888 gcc_assert (i <= 1);
44890 /* Extract the half. */
44891 tmp = gen_reg_rtx (half_mode);
44892 emit_insn (gen_extract[j][i] (tmp, target));
44894 /* Put val in tmp at elt. */
44895 ix86_expand_vector_set (false, tmp, val, elt);
44898 emit_insn (gen_insert[j][i] (target, target, tmp));
44902 if (TARGET_AVX512F)
44904 tmp = gen_reg_rtx (mode);
44905 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44906 gen_rtx_VEC_DUPLICATE (mode, val)));
44907 emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
44908 force_reg (QImode, GEN_INT (1 << elt))));
44914 if (TARGET_AVX512F)
44916 tmp = gen_reg_rtx (mode);
44917 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44918 gen_rtx_VEC_DUPLICATE (mode, val)));
44919 emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
44920 force_reg (QImode, GEN_INT (1 << elt))));
44926 if (TARGET_AVX512F)
44928 tmp = gen_reg_rtx (mode);
44929 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44930 gen_rtx_VEC_DUPLICATE (mode, val)));
44931 emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
44932 force_reg (HImode, GEN_INT (1 << elt))));
44938 if (TARGET_AVX512F)
44940 tmp = gen_reg_rtx (mode);
44941 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44942 gen_rtx_VEC_DUPLICATE (mode, val)));
44943 emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
44944 force_reg (HImode, GEN_INT (1 << elt))));
44950 if (TARGET_AVX512F && TARGET_AVX512BW)
44952 tmp = gen_reg_rtx (mode);
44953 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44954 gen_rtx_VEC_DUPLICATE (mode, val)));
44955 emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
44956 force_reg (SImode, GEN_INT (1 << elt))));
44962 if (TARGET_AVX512F && TARGET_AVX512BW)
44964 tmp = gen_reg_rtx (mode);
44965 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44966 gen_rtx_VEC_DUPLICATE (mode, val)));
44967 emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
44968 force_reg (DImode, GEN_INT (1 << elt))));
44980 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
44981 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
44982 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44986 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
44988 emit_move_insn (mem, target);
44990 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
44991 emit_move_insn (tmp, val);
44993 emit_move_insn (target, mem);
44998 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
45000 machine_mode mode = GET_MODE (vec);
45001 machine_mode inner_mode = GET_MODE_INNER (mode);
45002 bool use_vec_extr = false;
45015 use_vec_extr = true;
45019 use_vec_extr = TARGET_SSE4_1;
45031 tmp = gen_reg_rtx (mode);
45032 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
45033 GEN_INT (elt), GEN_INT (elt),
45034 GEN_INT (elt+4), GEN_INT (elt+4)));
45038 tmp = gen_reg_rtx (mode);
45039 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
45043 gcc_unreachable ();
45046 use_vec_extr = true;
45051 use_vec_extr = TARGET_SSE4_1;
45065 tmp = gen_reg_rtx (mode);
45066 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
45067 GEN_INT (elt), GEN_INT (elt),
45068 GEN_INT (elt), GEN_INT (elt)));
45072 tmp = gen_reg_rtx (mode);
45073 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
45077 gcc_unreachable ();
45080 use_vec_extr = true;
45085 /* For SSE1, we have to reuse the V4SF code. */
45086 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
45087 gen_lowpart (V4SFmode, vec), elt);
45093 use_vec_extr = TARGET_SSE2;
45096 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45100 use_vec_extr = TARGET_SSE4_1;
45106 tmp = gen_reg_rtx (V4SFmode);
45108 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
45110 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
45111 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45119 tmp = gen_reg_rtx (V2DFmode);
45121 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
45123 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
45124 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45132 tmp = gen_reg_rtx (V16QImode);
45134 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
45136 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
45137 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45145 tmp = gen_reg_rtx (V8HImode);
45147 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
45149 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
45150 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45158 tmp = gen_reg_rtx (V4SImode);
45160 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
45162 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
45163 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45171 tmp = gen_reg_rtx (V2DImode);
45173 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45175 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45176 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45182 if (TARGET_AVX512BW)
45184 tmp = gen_reg_rtx (V16HImode);
45186 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45188 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45189 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45195 if (TARGET_AVX512BW)
45197 tmp = gen_reg_rtx (V32QImode);
45199 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45201 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45202 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45208 tmp = gen_reg_rtx (V8SFmode);
45210 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45212 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45213 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45217 tmp = gen_reg_rtx (V4DFmode);
45219 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45221 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45222 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45226 tmp = gen_reg_rtx (V8SImode);
45228 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45230 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45231 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45235 tmp = gen_reg_rtx (V4DImode);
45237 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45239 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45240 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45244 /* ??? Could extract the appropriate HImode element and shift. */
45251 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45252 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45254 /* Let the rtl optimizers know about the zero extension performed. */
45255 if (inner_mode == QImode || inner_mode == HImode)
45257 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45258 target = gen_lowpart (SImode, target);
45261 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45265 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45267 emit_move_insn (mem, vec);
45269 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45270 emit_move_insn (target, tmp);
45274 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45275 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45276 The upper bits of DEST are undefined, though they shouldn't cause
45277 exceptions (some bits from src or all zeros are ok). */
45280 emit_reduc_half (rtx dest, rtx src, int i)
45283 switch (GET_MODE (src))
45287 tem = gen_sse_movhlps (dest, src, src);
45289 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45290 GEN_INT (1 + 4), GEN_INT (1 + 4));
45293 tem = gen_vec_interleave_highv2df (dest, src, src);
45299 d = gen_reg_rtx (V1TImode);
45300 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45305 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45307 tem = gen_avx_shufps256 (dest, src, src,
45308 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45312 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45314 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45322 if (GET_MODE (dest) != V4DImode)
45323 d = gen_reg_rtx (V4DImode);
45324 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45325 gen_lowpart (V4DImode, src),
45330 d = gen_reg_rtx (V2TImode);
45331 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45342 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45343 gen_lowpart (V16SImode, src),
45344 gen_lowpart (V16SImode, src),
45345 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45346 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45347 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45348 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45349 GEN_INT (0xC), GEN_INT (0xD),
45350 GEN_INT (0xE), GEN_INT (0xF),
45351 GEN_INT (0x10), GEN_INT (0x11),
45352 GEN_INT (0x12), GEN_INT (0x13),
45353 GEN_INT (0x14), GEN_INT (0x15),
45354 GEN_INT (0x16), GEN_INT (0x17));
45356 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45357 gen_lowpart (V16SImode, src),
45358 GEN_INT (i == 128 ? 0x2 : 0x1),
45362 GEN_INT (i == 128 ? 0x6 : 0x5),
45366 GEN_INT (i == 128 ? 0xA : 0x9),
45370 GEN_INT (i == 128 ? 0xE : 0xD),
45376 gcc_unreachable ();
45380 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45383 /* Expand a vector reduction. FN is the binary pattern to reduce;
45384 DEST is the destination; IN is the input vector. */
45387 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45389 rtx half, dst, vec = in;
45390 machine_mode mode = GET_MODE (in);
45393 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45395 && mode == V8HImode
45396 && fn == gen_uminv8hi3)
45398 emit_insn (gen_sse4_1_phminposuw (dest, in));
45402 for (i = GET_MODE_BITSIZE (mode);
45403 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45406 half = gen_reg_rtx (mode);
45407 emit_reduc_half (half, vec, i);
45408 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45411 dst = gen_reg_rtx (mode);
45412 emit_insn (fn (dst, half, vec));
45417 /* Target hook for scalar_mode_supported_p. */
45419 ix86_scalar_mode_supported_p (machine_mode mode)
45421 if (DECIMAL_FLOAT_MODE_P (mode))
45422 return default_decimal_float_supported_p ();
45423 else if (mode == TFmode)
45426 return default_scalar_mode_supported_p (mode);
45429 /* Implements target hook vector_mode_supported_p. */
45431 ix86_vector_mode_supported_p (machine_mode mode)
45433 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45435 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45437 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45439 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45441 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45443 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45448 /* Implement target hook libgcc_floating_mode_supported_p. */
45450 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45460 #ifdef IX86_NO_LIBGCC_TFMODE
45462 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45463 return TARGET_LONG_DOUBLE_128;
45473 /* Target hook for c_mode_for_suffix. */
45474 static machine_mode
45475 ix86_c_mode_for_suffix (char suffix)
45485 /* Worker function for TARGET_MD_ASM_CLOBBERS.
45487 We do this in the new i386 backend to maintain source compatibility
45488 with the old cc0-based compiler. */
45491 ix86_md_asm_clobbers (tree, tree, tree clobbers)
45493 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
45495 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
45500 /* Implements target vector targetm.asm.encode_section_info. */
45502 static void ATTRIBUTE_UNUSED
45503 ix86_encode_section_info (tree decl, rtx rtl, int first)
45505 default_encode_section_info (decl, rtl, first);
45507 if (ix86_in_large_data_p (decl))
45508 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45511 /* Worker function for REVERSE_CONDITION. */
45514 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45516 return (mode != CCFPmode && mode != CCFPUmode
45517 ? reverse_condition (code)
45518 : reverse_condition_maybe_unordered (code));
45521 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45525 output_387_reg_move (rtx insn, rtx *operands)
45527 if (REG_P (operands[0]))
45529 if (REG_P (operands[1])
45530 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45532 if (REGNO (operands[0]) == FIRST_STACK_REG)
45533 return output_387_ffreep (operands, 0);
45534 return "fstp\t%y0";
45536 if (STACK_TOP_P (operands[0]))
45537 return "fld%Z1\t%y1";
45540 else if (MEM_P (operands[0]))
45542 gcc_assert (REG_P (operands[1]));
45543 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45544 return "fstp%Z0\t%y0";
45547 /* There is no non-popping store to memory for XFmode.
45548 So if we need one, follow the store with a load. */
45549 if (GET_MODE (operands[0]) == XFmode)
45550 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45552 return "fst%Z0\t%y0";
45559 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45560 FP status register is set. */
45563 ix86_emit_fp_unordered_jump (rtx label)
45565 rtx reg = gen_reg_rtx (HImode);
45568 emit_insn (gen_x86_fnstsw_1 (reg));
45570 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45572 emit_insn (gen_x86_sahf_1 (reg));
45574 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45575 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45579 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45581 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45582 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45585 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45586 gen_rtx_LABEL_REF (VOIDmode, label),
45588 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
45590 emit_jump_insn (temp);
45591 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45594 /* Output code to perform a log1p XFmode calculation. */
45596 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45598 rtx_code_label *label1 = gen_label_rtx ();
45599 rtx_code_label *label2 = gen_label_rtx ();
45601 rtx tmp = gen_reg_rtx (XFmode);
45602 rtx tmp2 = gen_reg_rtx (XFmode);
45605 emit_insn (gen_absxf2 (tmp, op1));
45606 test = gen_rtx_GE (VOIDmode, tmp,
45607 CONST_DOUBLE_FROM_REAL_VALUE (
45608 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45610 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45612 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45613 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45614 emit_jump (label2);
45616 emit_label (label1);
45617 emit_move_insn (tmp, CONST1_RTX (XFmode));
45618 emit_insn (gen_addxf3 (tmp, op1, tmp));
45619 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45620 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45622 emit_label (label2);
45625 /* Emit code for round calculation. */
45626 void ix86_emit_i387_round (rtx op0, rtx op1)
45628 machine_mode inmode = GET_MODE (op1);
45629 machine_mode outmode = GET_MODE (op0);
45630 rtx e1, e2, res, tmp, tmp1, half;
45631 rtx scratch = gen_reg_rtx (HImode);
45632 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45633 rtx_code_label *jump_label = gen_label_rtx ();
45635 rtx (*gen_abs) (rtx, rtx);
45636 rtx (*gen_neg) (rtx, rtx);
45641 gen_abs = gen_abssf2;
45644 gen_abs = gen_absdf2;
45647 gen_abs = gen_absxf2;
45650 gcc_unreachable ();
45656 gen_neg = gen_negsf2;
45659 gen_neg = gen_negdf2;
45662 gen_neg = gen_negxf2;
45665 gen_neg = gen_neghi2;
45668 gen_neg = gen_negsi2;
45671 gen_neg = gen_negdi2;
45674 gcc_unreachable ();
45677 e1 = gen_reg_rtx (inmode);
45678 e2 = gen_reg_rtx (inmode);
45679 res = gen_reg_rtx (outmode);
45681 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45683 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45685 /* scratch = fxam(op1) */
45686 emit_insn (gen_rtx_SET (VOIDmode, scratch,
45687 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45689 /* e1 = fabs(op1) */
45690 emit_insn (gen_abs (e1, op1));
45692 /* e2 = e1 + 0.5 */
45693 half = force_reg (inmode, half);
45694 emit_insn (gen_rtx_SET (VOIDmode, e2,
45695 gen_rtx_PLUS (inmode, e1, half)));
45697 /* res = floor(e2) */
45698 if (inmode != XFmode)
45700 tmp1 = gen_reg_rtx (XFmode);
45702 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
45703 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45713 rtx tmp0 = gen_reg_rtx (XFmode);
45715 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
45717 emit_insn (gen_rtx_SET (VOIDmode, res,
45718 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
45719 UNSPEC_TRUNC_NOOP)));
45723 emit_insn (gen_frndintxf2_floor (res, tmp1));
45726 emit_insn (gen_lfloorxfhi2 (res, tmp1));
45729 emit_insn (gen_lfloorxfsi2 (res, tmp1));
45732 emit_insn (gen_lfloorxfdi2 (res, tmp1));
45735 gcc_unreachable ();
45738 /* flags = signbit(a) */
45739 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
45741 /* if (flags) then res = -res */
45742 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
45743 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
45744 gen_rtx_LABEL_REF (VOIDmode, jump_label),
45746 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
45747 predict_jump (REG_BR_PROB_BASE * 50 / 100);
45748 JUMP_LABEL (insn) = jump_label;
45750 emit_insn (gen_neg (res, res));
45752 emit_label (jump_label);
45753 LABEL_NUSES (jump_label) = 1;
45755 emit_move_insn (op0, res);
45758 /* Output code to perform a Newton-Rhapson approximation of a single precision
45759 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
45761 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
45763 rtx x0, x1, e0, e1;
45765 x0 = gen_reg_rtx (mode);
45766 e0 = gen_reg_rtx (mode);
45767 e1 = gen_reg_rtx (mode);
45768 x1 = gen_reg_rtx (mode);
45770 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
45772 b = force_reg (mode, b);
45774 /* x0 = rcp(b) estimate */
45775 if (mode == V16SFmode || mode == V8DFmode)
45776 emit_insn (gen_rtx_SET (VOIDmode, x0,
45777 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45780 emit_insn (gen_rtx_SET (VOIDmode, x0,
45781 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
45785 emit_insn (gen_rtx_SET (VOIDmode, e0,
45786 gen_rtx_MULT (mode, x0, b)));
45789 emit_insn (gen_rtx_SET (VOIDmode, e0,
45790 gen_rtx_MULT (mode, x0, e0)));
45793 emit_insn (gen_rtx_SET (VOIDmode, e1,
45794 gen_rtx_PLUS (mode, x0, x0)));
45797 emit_insn (gen_rtx_SET (VOIDmode, x1,
45798 gen_rtx_MINUS (mode, e1, e0)));
45801 emit_insn (gen_rtx_SET (VOIDmode, res,
45802 gen_rtx_MULT (mode, a, x1)));
45805 /* Output code to perform a Newton-Rhapson approximation of a
45806 single precision floating point [reciprocal] square root. */
45808 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
45811 rtx x0, e0, e1, e2, e3, mthree, mhalf;
45815 x0 = gen_reg_rtx (mode);
45816 e0 = gen_reg_rtx (mode);
45817 e1 = gen_reg_rtx (mode);
45818 e2 = gen_reg_rtx (mode);
45819 e3 = gen_reg_rtx (mode);
45821 real_from_integer (&r, VOIDmode, -3, SIGNED);
45822 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45824 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
45825 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
45826 unspec = UNSPEC_RSQRT;
45828 if (VECTOR_MODE_P (mode))
45830 mthree = ix86_build_const_vector (mode, true, mthree);
45831 mhalf = ix86_build_const_vector (mode, true, mhalf);
45832 /* There is no 512-bit rsqrt. There is however rsqrt14. */
45833 if (GET_MODE_SIZE (mode) == 64)
45834 unspec = UNSPEC_RSQRT14;
45837 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
45838 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
45840 a = force_reg (mode, a);
45842 /* x0 = rsqrt(a) estimate */
45843 emit_insn (gen_rtx_SET (VOIDmode, x0,
45844 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
45847 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
45852 zero = gen_reg_rtx (mode);
45853 mask = gen_reg_rtx (mode);
45855 zero = force_reg (mode, CONST0_RTX(mode));
45857 /* Handle masked compare. */
45858 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
45860 mask = gen_reg_rtx (HImode);
45861 /* Imm value 0x4 corresponds to not-equal comparison. */
45862 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
45863 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
45867 emit_insn (gen_rtx_SET (VOIDmode, mask,
45868 gen_rtx_NE (mode, zero, a)));
45870 emit_insn (gen_rtx_SET (VOIDmode, x0,
45871 gen_rtx_AND (mode, x0, mask)));
45876 emit_insn (gen_rtx_SET (VOIDmode, e0,
45877 gen_rtx_MULT (mode, x0, a)));
45879 emit_insn (gen_rtx_SET (VOIDmode, e1,
45880 gen_rtx_MULT (mode, e0, x0)));
45883 mthree = force_reg (mode, mthree);
45884 emit_insn (gen_rtx_SET (VOIDmode, e2,
45885 gen_rtx_PLUS (mode, e1, mthree)));
45887 mhalf = force_reg (mode, mhalf);
45889 /* e3 = -.5 * x0 */
45890 emit_insn (gen_rtx_SET (VOIDmode, e3,
45891 gen_rtx_MULT (mode, x0, mhalf)));
45893 /* e3 = -.5 * e0 */
45894 emit_insn (gen_rtx_SET (VOIDmode, e3,
45895 gen_rtx_MULT (mode, e0, mhalf)));
45896 /* ret = e2 * e3 */
45897 emit_insn (gen_rtx_SET (VOIDmode, res,
45898 gen_rtx_MULT (mode, e2, e3)));
45901 #ifdef TARGET_SOLARIS
45902 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
45905 i386_solaris_elf_named_section (const char *name, unsigned int flags,
45908 /* With Binutils 2.15, the "@unwind" marker must be specified on
45909 every occurrence of the ".eh_frame" section, not just the first
45912 && strcmp (name, ".eh_frame") == 0)
45914 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
45915 flags & SECTION_WRITE ? "aw" : "a");
45920 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
45922 solaris_elf_asm_comdat_section (name, flags, decl);
45927 default_elf_asm_named_section (name, flags, decl);
45929 #endif /* TARGET_SOLARIS */
45931 /* Return the mangling of TYPE if it is an extended fundamental type. */
45933 static const char *
45934 ix86_mangle_type (const_tree type)
45936 type = TYPE_MAIN_VARIANT (type);
45938 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
45939 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
45942 switch (TYPE_MODE (type))
45945 /* __float128 is "g". */
45948 /* "long double" or __float80 is "e". */
45955 /* For 32-bit code we can save PIC register setup by using
45956 __stack_chk_fail_local hidden function instead of calling
45957 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
45958 register, so it is better to call __stack_chk_fail directly. */
45960 static tree ATTRIBUTE_UNUSED
45961 ix86_stack_protect_fail (void)
45963 return TARGET_64BIT
45964 ? default_external_stack_protect_fail ()
45965 : default_hidden_stack_protect_fail ();
45968 /* Select a format to encode pointers in exception handling data. CODE
45969 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
45970 true if the symbol may be affected by dynamic relocations.
45972 ??? All x86 object file formats are capable of representing this.
45973 After all, the relocation needed is the same as for the call insn.
45974 Whether or not a particular assembler allows us to enter such, I
45975 guess we'll have to see. */
45977 asm_preferred_eh_data_format (int code, int global)
45981 int type = DW_EH_PE_sdata8;
45983 || ix86_cmodel == CM_SMALL_PIC
45984 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
45985 type = DW_EH_PE_sdata4;
45986 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
45988 if (ix86_cmodel == CM_SMALL
45989 || (ix86_cmodel == CM_MEDIUM && code))
45990 return DW_EH_PE_udata4;
45991 return DW_EH_PE_absptr;
45994 /* Expand copysign from SIGN to the positive value ABS_VALUE
45995 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
45998 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
46000 machine_mode mode = GET_MODE (sign);
46001 rtx sgn = gen_reg_rtx (mode);
46002 if (mask == NULL_RTX)
46004 machine_mode vmode;
46006 if (mode == SFmode)
46008 else if (mode == DFmode)
46013 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
46014 if (!VECTOR_MODE_P (mode))
46016 /* We need to generate a scalar mode mask in this case. */
46017 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46018 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46019 mask = gen_reg_rtx (mode);
46020 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
46024 mask = gen_rtx_NOT (mode, mask);
46025 emit_insn (gen_rtx_SET (VOIDmode, sgn,
46026 gen_rtx_AND (mode, mask, sign)));
46027 emit_insn (gen_rtx_SET (VOIDmode, result,
46028 gen_rtx_IOR (mode, abs_value, sgn)));
46031 /* Expand fabs (OP0) and return a new rtx that holds the result. The
46032 mask for masking out the sign-bit is stored in *SMASK, if that is
46035 ix86_expand_sse_fabs (rtx op0, rtx *smask)
46037 machine_mode vmode, mode = GET_MODE (op0);
46040 xa = gen_reg_rtx (mode);
46041 if (mode == SFmode)
46043 else if (mode == DFmode)
46047 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
46048 if (!VECTOR_MODE_P (mode))
46050 /* We need to generate a scalar mode mask in this case. */
46051 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46052 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46053 mask = gen_reg_rtx (mode);
46054 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
46056 emit_insn (gen_rtx_SET (VOIDmode, xa,
46057 gen_rtx_AND (mode, op0, mask)));
46065 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
46066 swapping the operands if SWAP_OPERANDS is true. The expanded
46067 code is a forward jump to a newly created label in case the
46068 comparison is true. The generated label rtx is returned. */
46069 static rtx_code_label *
46070 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
46071 bool swap_operands)
46073 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
46074 rtx_code_label *label;
46078 std::swap (op0, op1);
46080 label = gen_label_rtx ();
46081 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
46082 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46083 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
46084 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
46085 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
46086 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
46087 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
46088 JUMP_LABEL (tmp) = label;
46093 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
46094 using comparison code CODE. Operands are swapped for the comparison if
46095 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
46097 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
46098 bool swap_operands)
46100 rtx (*insn)(rtx, rtx, rtx, rtx);
46101 machine_mode mode = GET_MODE (op0);
46102 rtx mask = gen_reg_rtx (mode);
46105 std::swap (op0, op1);
46107 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
46109 emit_insn (insn (mask, op0, op1,
46110 gen_rtx_fmt_ee (code, mode, op0, op1)));
46114 /* Generate and return a rtx of mode MODE for 2**n where n is the number
46115 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
46117 ix86_gen_TWO52 (machine_mode mode)
46119 REAL_VALUE_TYPE TWO52r;
46122 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
46123 TWO52 = const_double_from_real_value (TWO52r, mode);
46124 TWO52 = force_reg (mode, TWO52);
46129 /* Expand SSE sequence for computing lround from OP1 storing
46132 ix86_expand_lround (rtx op0, rtx op1)
46134 /* C code for the stuff we're doing below:
46135 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46138 machine_mode mode = GET_MODE (op1);
46139 const struct real_format *fmt;
46140 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46143 /* load nextafter (0.5, 0.0) */
46144 fmt = REAL_MODE_FORMAT (mode);
46145 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46146 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46148 /* adj = copysign (0.5, op1) */
46149 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
46150 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
46152 /* adj = op1 + adj */
46153 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
46155 /* op0 = (imode)adj */
46156 expand_fix (op0, adj, 0);
46159 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
46162 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
46164 /* C code for the stuff we're doing below (for do_floor):
46166 xi -= (double)xi > op1 ? 1 : 0;
46169 machine_mode fmode = GET_MODE (op1);
46170 machine_mode imode = GET_MODE (op0);
46171 rtx ireg, freg, tmp;
46172 rtx_code_label *label;
46174 /* reg = (long)op1 */
46175 ireg = gen_reg_rtx (imode);
46176 expand_fix (ireg, op1, 0);
46178 /* freg = (double)reg */
46179 freg = gen_reg_rtx (fmode);
46180 expand_float (freg, ireg, 0);
46182 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46183 label = ix86_expand_sse_compare_and_jump (UNLE,
46184 freg, op1, !do_floor);
46185 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46186 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46187 emit_move_insn (ireg, tmp);
46189 emit_label (label);
46190 LABEL_NUSES (label) = 1;
46192 emit_move_insn (op0, ireg);
46195 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46196 result in OPERAND0. */
46198 ix86_expand_rint (rtx operand0, rtx operand1)
46200 /* C code for the stuff we're doing below:
46201 xa = fabs (operand1);
46202 if (!isless (xa, 2**52))
46204 xa = xa + 2**52 - 2**52;
46205 return copysign (xa, operand1);
46207 machine_mode mode = GET_MODE (operand0);
46208 rtx res, xa, TWO52, mask;
46209 rtx_code_label *label;
46211 res = gen_reg_rtx (mode);
46212 emit_move_insn (res, operand1);
46214 /* xa = abs (operand1) */
46215 xa = ix86_expand_sse_fabs (res, &mask);
46217 /* if (!isless (xa, TWO52)) goto label; */
46218 TWO52 = ix86_gen_TWO52 (mode);
46219 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46221 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46222 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46224 ix86_sse_copysign_to_positive (res, xa, res, mask);
46226 emit_label (label);
46227 LABEL_NUSES (label) = 1;
46229 emit_move_insn (operand0, res);
46232 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46235 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46237 /* C code for the stuff we expand below.
46238 double xa = fabs (x), x2;
46239 if (!isless (xa, TWO52))
46241 xa = xa + TWO52 - TWO52;
46242 x2 = copysign (xa, x);
46251 machine_mode mode = GET_MODE (operand0);
46252 rtx xa, TWO52, tmp, one, res, mask;
46253 rtx_code_label *label;
46255 TWO52 = ix86_gen_TWO52 (mode);
46257 /* Temporary for holding the result, initialized to the input
46258 operand to ease control flow. */
46259 res = gen_reg_rtx (mode);
46260 emit_move_insn (res, operand1);
46262 /* xa = abs (operand1) */
46263 xa = ix86_expand_sse_fabs (res, &mask);
46265 /* if (!isless (xa, TWO52)) goto label; */
46266 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46268 /* xa = xa + TWO52 - TWO52; */
46269 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46270 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46272 /* xa = copysign (xa, operand1) */
46273 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46275 /* generate 1.0 or -1.0 */
46276 one = force_reg (mode,
46277 const_double_from_real_value (do_floor
46278 ? dconst1 : dconstm1, mode));
46280 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46281 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46282 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46283 gen_rtx_AND (mode, one, tmp)));
46284 /* We always need to subtract here to preserve signed zero. */
46285 tmp = expand_simple_binop (mode, MINUS,
46286 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46287 emit_move_insn (res, tmp);
46289 emit_label (label);
46290 LABEL_NUSES (label) = 1;
46292 emit_move_insn (operand0, res);
46295 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46298 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46300 /* C code for the stuff we expand below.
46301 double xa = fabs (x), x2;
46302 if (!isless (xa, TWO52))
46304 x2 = (double)(long)x;
46311 if (HONOR_SIGNED_ZEROS (mode))
46312 return copysign (x2, x);
46315 machine_mode mode = GET_MODE (operand0);
46316 rtx xa, xi, TWO52, tmp, one, res, mask;
46317 rtx_code_label *label;
46319 TWO52 = ix86_gen_TWO52 (mode);
46321 /* Temporary for holding the result, initialized to the input
46322 operand to ease control flow. */
46323 res = gen_reg_rtx (mode);
46324 emit_move_insn (res, operand1);
46326 /* xa = abs (operand1) */
46327 xa = ix86_expand_sse_fabs (res, &mask);
46329 /* if (!isless (xa, TWO52)) goto label; */
46330 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46332 /* xa = (double)(long)x */
46333 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46334 expand_fix (xi, res, 0);
46335 expand_float (xa, xi, 0);
46338 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46340 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46341 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46342 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46343 gen_rtx_AND (mode, one, tmp)));
46344 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46345 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46346 emit_move_insn (res, tmp);
46348 if (HONOR_SIGNED_ZEROS (mode))
46349 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46351 emit_label (label);
46352 LABEL_NUSES (label) = 1;
46354 emit_move_insn (operand0, res);
46357 /* Expand SSE sequence for computing round from OPERAND1 storing
46358 into OPERAND0. Sequence that works without relying on DImode truncation
46359 via cvttsd2siq that is only available on 64bit targets. */
46361 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46363 /* C code for the stuff we expand below.
46364 double xa = fabs (x), xa2, x2;
46365 if (!isless (xa, TWO52))
46367 Using the absolute value and copying back sign makes
46368 -0.0 -> -0.0 correct.
46369 xa2 = xa + TWO52 - TWO52;
46374 else if (dxa > 0.5)
46376 x2 = copysign (xa2, x);
46379 machine_mode mode = GET_MODE (operand0);
46380 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46381 rtx_code_label *label;
46383 TWO52 = ix86_gen_TWO52 (mode);
46385 /* Temporary for holding the result, initialized to the input
46386 operand to ease control flow. */
46387 res = gen_reg_rtx (mode);
46388 emit_move_insn (res, operand1);
46390 /* xa = abs (operand1) */
46391 xa = ix86_expand_sse_fabs (res, &mask);
46393 /* if (!isless (xa, TWO52)) goto label; */
46394 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46396 /* xa2 = xa + TWO52 - TWO52; */
46397 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46398 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46400 /* dxa = xa2 - xa; */
46401 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46403 /* generate 0.5, 1.0 and -0.5 */
46404 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46405 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46406 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46410 tmp = gen_reg_rtx (mode);
46411 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46412 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46413 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46414 gen_rtx_AND (mode, one, tmp)));
46415 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46416 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46417 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46418 emit_insn (gen_rtx_SET (VOIDmode, tmp,
46419 gen_rtx_AND (mode, one, tmp)));
46420 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46422 /* res = copysign (xa2, operand1) */
46423 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46425 emit_label (label);
46426 LABEL_NUSES (label) = 1;
46428 emit_move_insn (operand0, res);
46431 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46434 ix86_expand_trunc (rtx operand0, rtx operand1)
46436 /* C code for SSE variant we expand below.
46437 double xa = fabs (x), x2;
46438 if (!isless (xa, TWO52))
46440 x2 = (double)(long)x;
46441 if (HONOR_SIGNED_ZEROS (mode))
46442 return copysign (x2, x);
46445 machine_mode mode = GET_MODE (operand0);
46446 rtx xa, xi, TWO52, res, mask;
46447 rtx_code_label *label;
46449 TWO52 = ix86_gen_TWO52 (mode);
46451 /* Temporary for holding the result, initialized to the input
46452 operand to ease control flow. */
46453 res = gen_reg_rtx (mode);
46454 emit_move_insn (res, operand1);
46456 /* xa = abs (operand1) */
46457 xa = ix86_expand_sse_fabs (res, &mask);
46459 /* if (!isless (xa, TWO52)) goto label; */
46460 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46462 /* x = (double)(long)x */
46463 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46464 expand_fix (xi, res, 0);
46465 expand_float (res, xi, 0);
46467 if (HONOR_SIGNED_ZEROS (mode))
46468 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46470 emit_label (label);
46471 LABEL_NUSES (label) = 1;
46473 emit_move_insn (operand0, res);
46476 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46479 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46481 machine_mode mode = GET_MODE (operand0);
46482 rtx xa, mask, TWO52, one, res, smask, tmp;
46483 rtx_code_label *label;
46485 /* C code for SSE variant we expand below.
46486 double xa = fabs (x), x2;
46487 if (!isless (xa, TWO52))
46489 xa2 = xa + TWO52 - TWO52;
46493 x2 = copysign (xa2, x);
46497 TWO52 = ix86_gen_TWO52 (mode);
46499 /* Temporary for holding the result, initialized to the input
46500 operand to ease control flow. */
46501 res = gen_reg_rtx (mode);
46502 emit_move_insn (res, operand1);
46504 /* xa = abs (operand1) */
46505 xa = ix86_expand_sse_fabs (res, &smask);
46507 /* if (!isless (xa, TWO52)) goto label; */
46508 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46510 /* res = xa + TWO52 - TWO52; */
46511 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46512 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46513 emit_move_insn (res, tmp);
46516 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46518 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46519 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46520 emit_insn (gen_rtx_SET (VOIDmode, mask,
46521 gen_rtx_AND (mode, mask, one)));
46522 tmp = expand_simple_binop (mode, MINUS,
46523 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46524 emit_move_insn (res, tmp);
46526 /* res = copysign (res, operand1) */
46527 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46529 emit_label (label);
46530 LABEL_NUSES (label) = 1;
46532 emit_move_insn (operand0, res);
46535 /* Expand SSE sequence for computing round from OPERAND1 storing
46538 ix86_expand_round (rtx operand0, rtx operand1)
46540 /* C code for the stuff we're doing below:
46541 double xa = fabs (x);
46542 if (!isless (xa, TWO52))
46544 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46545 return copysign (xa, x);
46547 machine_mode mode = GET_MODE (operand0);
46548 rtx res, TWO52, xa, xi, half, mask;
46549 rtx_code_label *label;
46550 const struct real_format *fmt;
46551 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46553 /* Temporary for holding the result, initialized to the input
46554 operand to ease control flow. */
46555 res = gen_reg_rtx (mode);
46556 emit_move_insn (res, operand1);
46558 TWO52 = ix86_gen_TWO52 (mode);
46559 xa = ix86_expand_sse_fabs (res, &mask);
46560 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46562 /* load nextafter (0.5, 0.0) */
46563 fmt = REAL_MODE_FORMAT (mode);
46564 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46565 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46567 /* xa = xa + 0.5 */
46568 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46569 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46571 /* xa = (double)(int64_t)xa */
46572 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46573 expand_fix (xi, xa, 0);
46574 expand_float (xa, xi, 0);
46576 /* res = copysign (xa, operand1) */
46577 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46579 emit_label (label);
46580 LABEL_NUSES (label) = 1;
46582 emit_move_insn (operand0, res);
46585 /* Expand SSE sequence for computing round
46586 from OP1 storing into OP0 using sse4 round insn. */
46588 ix86_expand_round_sse4 (rtx op0, rtx op1)
46590 machine_mode mode = GET_MODE (op0);
46591 rtx e1, e2, res, half;
46592 const struct real_format *fmt;
46593 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46594 rtx (*gen_copysign) (rtx, rtx, rtx);
46595 rtx (*gen_round) (rtx, rtx, rtx);
46600 gen_copysign = gen_copysignsf3;
46601 gen_round = gen_sse4_1_roundsf2;
46604 gen_copysign = gen_copysigndf3;
46605 gen_round = gen_sse4_1_rounddf2;
46608 gcc_unreachable ();
46611 /* round (a) = trunc (a + copysign (0.5, a)) */
46613 /* load nextafter (0.5, 0.0) */
46614 fmt = REAL_MODE_FORMAT (mode);
46615 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46616 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46617 half = const_double_from_real_value (pred_half, mode);
46619 /* e1 = copysign (0.5, op1) */
46620 e1 = gen_reg_rtx (mode);
46621 emit_insn (gen_copysign (e1, half, op1));
46623 /* e2 = op1 + e1 */
46624 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46626 /* res = trunc (e2) */
46627 res = gen_reg_rtx (mode);
46628 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46630 emit_move_insn (op0, res);
46634 /* Table of valid machine attributes. */
46635 static const struct attribute_spec ix86_attribute_table[] =
46637 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46638 affects_type_identity } */
46639 /* Stdcall attribute says callee is responsible for popping arguments
46640 if they are not variable. */
46641 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46643 /* Fastcall attribute says callee is responsible for popping arguments
46644 if they are not variable. */
46645 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46647 /* Thiscall attribute says callee is responsible for popping arguments
46648 if they are not variable. */
46649 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46651 /* Cdecl attribute says the callee is a normal C declaration */
46652 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46654 /* Regparm attribute specifies how many integer arguments are to be
46655 passed in registers. */
46656 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
46658 /* Sseregparm attribute says we are using x86_64 calling conventions
46659 for FP arguments. */
46660 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46662 /* The transactional memory builtins are implicitly regparm or fastcall
46663 depending on the ABI. Override the generic do-nothing attribute that
46664 these builtins were declared with. */
46665 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46667 /* force_align_arg_pointer says this function realigns the stack at entry. */
46668 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46669 false, true, true, ix86_handle_cconv_attribute, false },
46670 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46671 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46672 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46673 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
46676 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46678 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46680 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46681 SUBTARGET_ATTRIBUTE_TABLE,
46683 /* ms_abi and sysv_abi calling convention function attributes. */
46684 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46685 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46686 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46688 { "callee_pop_aggregate_return", 1, 1, false, true, true,
46689 ix86_handle_callee_pop_aggregate_return, true },
46691 { NULL, 0, 0, false, false, false, NULL, false }
46694 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46696 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46701 switch (type_of_cost)
46704 return ix86_cost->scalar_stmt_cost;
46707 return ix86_cost->scalar_load_cost;
46710 return ix86_cost->scalar_store_cost;
46713 return ix86_cost->vec_stmt_cost;
46716 return ix86_cost->vec_align_load_cost;
46719 return ix86_cost->vec_store_cost;
46721 case vec_to_scalar:
46722 return ix86_cost->vec_to_scalar_cost;
46724 case scalar_to_vec:
46725 return ix86_cost->scalar_to_vec_cost;
46727 case unaligned_load:
46728 case unaligned_store:
46729 return ix86_cost->vec_unalign_load_cost;
46731 case cond_branch_taken:
46732 return ix86_cost->cond_taken_branch_cost;
46734 case cond_branch_not_taken:
46735 return ix86_cost->cond_not_taken_branch_cost;
46738 case vec_promote_demote:
46739 return ix86_cost->vec_stmt_cost;
46741 case vec_construct:
46742 elements = TYPE_VECTOR_SUBPARTS (vectype);
46743 return elements / 2 + 1;
46746 gcc_unreachable ();
46750 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46751 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46752 insn every time. */
46754 static GTY(()) rtx_insn *vselect_insn;
46756 /* Initialize vselect_insn. */
46759 init_vselect_insn (void)
46764 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
46765 for (i = 0; i < MAX_VECT_LEN; ++i)
46766 XVECEXP (x, 0, i) = const0_rtx;
46767 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
46769 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
46771 vselect_insn = emit_insn (x);
46775 /* Construct (set target (vec_select op0 (parallel perm))) and
46776 return true if that's a valid instruction in the active ISA. */
46779 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
46780 unsigned nelt, bool testing_p)
46783 rtx x, save_vconcat;
46786 if (vselect_insn == NULL_RTX)
46787 init_vselect_insn ();
46789 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
46790 PUT_NUM_ELEM (XVEC (x, 0), nelt);
46791 for (i = 0; i < nelt; ++i)
46792 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
46793 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46794 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
46795 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
46796 SET_DEST (PATTERN (vselect_insn)) = target;
46797 icode = recog_memoized (vselect_insn);
46799 if (icode >= 0 && !testing_p)
46800 emit_insn (copy_rtx (PATTERN (vselect_insn)));
46802 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
46803 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
46804 INSN_CODE (vselect_insn) = -1;
46809 /* Similar, but generate a vec_concat from op0 and op1 as well. */
46812 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
46813 const unsigned char *perm, unsigned nelt,
46816 machine_mode v2mode;
46820 if (vselect_insn == NULL_RTX)
46821 init_vselect_insn ();
46823 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
46824 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
46825 PUT_MODE (x, v2mode);
46828 ok = expand_vselect (target, x, perm, nelt, testing_p);
46829 XEXP (x, 0) = const0_rtx;
46830 XEXP (x, 1) = const0_rtx;
46834 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46835 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
46838 expand_vec_perm_blend (struct expand_vec_perm_d *d)
46840 machine_mode vmode = d->vmode;
46841 unsigned i, mask, nelt = d->nelt;
46842 rtx target, op0, op1, x;
46843 rtx rperm[32], vperm;
46845 if (d->one_operand_p)
46847 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
46848 && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
46850 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
46852 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
46854 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46859 /* This is a blend, not a permute. Elements must stay in their
46860 respective lanes. */
46861 for (i = 0; i < nelt; ++i)
46863 unsigned e = d->perm[i];
46864 if (!(e == i || e == i + nelt))
46871 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
46872 decision should be extracted elsewhere, so that we only try that
46873 sequence once all budget==3 options have been tried. */
46874 target = d->target;
46893 for (i = 0; i < nelt; ++i)
46894 mask |= (d->perm[i] >= nelt) << i;
46898 for (i = 0; i < 2; ++i)
46899 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
46904 for (i = 0; i < 4; ++i)
46905 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
46910 /* See if bytes move in pairs so we can use pblendw with
46911 an immediate argument, rather than pblendvb with a vector
46913 for (i = 0; i < 16; i += 2)
46914 if (d->perm[i] + 1 != d->perm[i + 1])
46917 for (i = 0; i < nelt; ++i)
46918 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
46921 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
46922 vperm = force_reg (vmode, vperm);
46924 if (GET_MODE_SIZE (vmode) == 16)
46925 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
46927 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
46928 if (target != d->target)
46929 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
46933 for (i = 0; i < 8; ++i)
46934 mask |= (d->perm[i * 2] >= 16) << i;
46939 target = gen_reg_rtx (vmode);
46940 op0 = gen_lowpart (vmode, op0);
46941 op1 = gen_lowpart (vmode, op1);
46945 /* See if bytes move in pairs. If not, vpblendvb must be used. */
46946 for (i = 0; i < 32; i += 2)
46947 if (d->perm[i] + 1 != d->perm[i + 1])
46949 /* See if bytes move in quadruplets. If yes, vpblendd
46950 with immediate can be used. */
46951 for (i = 0; i < 32; i += 4)
46952 if (d->perm[i] + 2 != d->perm[i + 2])
46956 /* See if bytes move the same in both lanes. If yes,
46957 vpblendw with immediate can be used. */
46958 for (i = 0; i < 16; i += 2)
46959 if (d->perm[i] + 16 != d->perm[i + 16])
46962 /* Use vpblendw. */
46963 for (i = 0; i < 16; ++i)
46964 mask |= (d->perm[i * 2] >= 32) << i;
46969 /* Use vpblendd. */
46970 for (i = 0; i < 8; ++i)
46971 mask |= (d->perm[i * 4] >= 32) << i;
46976 /* See if words move in pairs. If yes, vpblendd can be used. */
46977 for (i = 0; i < 16; i += 2)
46978 if (d->perm[i] + 1 != d->perm[i + 1])
46982 /* See if words move the same in both lanes. If not,
46983 vpblendvb must be used. */
46984 for (i = 0; i < 8; i++)
46985 if (d->perm[i] + 8 != d->perm[i + 8])
46987 /* Use vpblendvb. */
46988 for (i = 0; i < 32; ++i)
46989 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
46993 target = gen_reg_rtx (vmode);
46994 op0 = gen_lowpart (vmode, op0);
46995 op1 = gen_lowpart (vmode, op1);
46996 goto finish_pblendvb;
46999 /* Use vpblendw. */
47000 for (i = 0; i < 16; ++i)
47001 mask |= (d->perm[i] >= 16) << i;
47005 /* Use vpblendd. */
47006 for (i = 0; i < 8; ++i)
47007 mask |= (d->perm[i * 2] >= 16) << i;
47012 /* Use vpblendd. */
47013 for (i = 0; i < 4; ++i)
47014 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
47019 gcc_unreachable ();
47022 /* This matches five different patterns with the different modes. */
47023 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
47024 x = gen_rtx_SET (VOIDmode, target, x);
47026 if (target != d->target)
47027 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47032 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47033 in terms of the variable form of vpermilps.
47035 Note that we will have already failed the immediate input vpermilps,
47036 which requires that the high and low part shuffle be identical; the
47037 variable form doesn't require that. */
47040 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
47042 rtx rperm[8], vperm;
47045 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
47048 /* We can only permute within the 128-bit lane. */
47049 for (i = 0; i < 8; ++i)
47051 unsigned e = d->perm[i];
47052 if (i < 4 ? e >= 4 : e < 4)
47059 for (i = 0; i < 8; ++i)
47061 unsigned e = d->perm[i];
47063 /* Within each 128-bit lane, the elements of op0 are numbered
47064 from 0 and the elements of op1 are numbered from 4. */
47070 rperm[i] = GEN_INT (e);
47073 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
47074 vperm = force_reg (V8SImode, vperm);
47075 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
47080 /* Return true if permutation D can be performed as VMODE permutation
47084 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
47086 unsigned int i, j, chunk;
47088 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
47089 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
47090 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
47093 if (GET_MODE_NUNITS (vmode) >= d->nelt)
47096 chunk = d->nelt / GET_MODE_NUNITS (vmode);
47097 for (i = 0; i < d->nelt; i += chunk)
47098 if (d->perm[i] & (chunk - 1))
47101 for (j = 1; j < chunk; ++j)
47102 if (d->perm[i] + j != d->perm[i + j])
47108 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47109 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
47112 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
47114 unsigned i, nelt, eltsz, mask;
47115 unsigned char perm[64];
47116 machine_mode vmode = V16QImode;
47117 rtx rperm[64], vperm, target, op0, op1;
47121 if (!d->one_operand_p)
47123 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
47126 && valid_perm_using_mode_p (V2TImode, d))
47131 /* Use vperm2i128 insn. The pattern uses
47132 V4DImode instead of V2TImode. */
47133 target = d->target;
47134 if (d->vmode != V4DImode)
47135 target = gen_reg_rtx (V4DImode);
47136 op0 = gen_lowpart (V4DImode, d->op0);
47137 op1 = gen_lowpart (V4DImode, d->op1);
47139 = GEN_INT ((d->perm[0] / (nelt / 2))
47140 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
47141 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
47142 if (target != d->target)
47143 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47151 if (GET_MODE_SIZE (d->vmode) == 16)
47156 else if (GET_MODE_SIZE (d->vmode) == 32)
47161 /* V4DImode should be already handled through
47162 expand_vselect by vpermq instruction. */
47163 gcc_assert (d->vmode != V4DImode);
47166 if (d->vmode == V8SImode
47167 || d->vmode == V16HImode
47168 || d->vmode == V32QImode)
47170 /* First see if vpermq can be used for
47171 V8SImode/V16HImode/V32QImode. */
47172 if (valid_perm_using_mode_p (V4DImode, d))
47174 for (i = 0; i < 4; i++)
47175 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47178 target = gen_reg_rtx (V4DImode);
47179 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47182 emit_move_insn (d->target,
47183 gen_lowpart (d->vmode, target));
47189 /* Next see if vpermd can be used. */
47190 if (valid_perm_using_mode_p (V8SImode, d))
47193 /* Or if vpermps can be used. */
47194 else if (d->vmode == V8SFmode)
47197 if (vmode == V32QImode)
47199 /* vpshufb only works intra lanes, it is not
47200 possible to shuffle bytes in between the lanes. */
47201 for (i = 0; i < nelt; ++i)
47202 if ((d->perm[i] ^ i) & (nelt / 2))
47206 else if (GET_MODE_SIZE (d->vmode) == 64)
47208 if (!TARGET_AVX512BW)
47211 /* If vpermq didn't work, vpshufb won't work either. */
47212 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47216 if (d->vmode == V16SImode
47217 || d->vmode == V32HImode
47218 || d->vmode == V64QImode)
47220 /* First see if vpermq can be used for
47221 V16SImode/V32HImode/V64QImode. */
47222 if (valid_perm_using_mode_p (V8DImode, d))
47224 for (i = 0; i < 8; i++)
47225 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47228 target = gen_reg_rtx (V8DImode);
47229 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47232 emit_move_insn (d->target,
47233 gen_lowpart (d->vmode, target));
47239 /* Next see if vpermd can be used. */
47240 if (valid_perm_using_mode_p (V16SImode, d))
47243 /* Or if vpermps can be used. */
47244 else if (d->vmode == V16SFmode)
47246 if (vmode == V64QImode)
47248 /* vpshufb only works intra lanes, it is not
47249 possible to shuffle bytes in between the lanes. */
47250 for (i = 0; i < nelt; ++i)
47251 if ((d->perm[i] ^ i) & (nelt / 4))
47262 if (vmode == V8SImode)
47263 for (i = 0; i < 8; ++i)
47264 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47265 else if (vmode == V16SImode)
47266 for (i = 0; i < 16; ++i)
47267 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47270 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47271 if (!d->one_operand_p)
47272 mask = 2 * nelt - 1;
47273 else if (vmode == V16QImode)
47275 else if (vmode == V64QImode)
47276 mask = nelt / 4 - 1;
47278 mask = nelt / 2 - 1;
47280 for (i = 0; i < nelt; ++i)
47282 unsigned j, e = d->perm[i] & mask;
47283 for (j = 0; j < eltsz; ++j)
47284 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47288 vperm = gen_rtx_CONST_VECTOR (vmode,
47289 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47290 vperm = force_reg (vmode, vperm);
47292 target = d->target;
47293 if (d->vmode != vmode)
47294 target = gen_reg_rtx (vmode);
47295 op0 = gen_lowpart (vmode, d->op0);
47296 if (d->one_operand_p)
47298 if (vmode == V16QImode)
47299 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47300 else if (vmode == V32QImode)
47301 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47302 else if (vmode == V64QImode)
47303 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47304 else if (vmode == V8SFmode)
47305 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47306 else if (vmode == V8SImode)
47307 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47308 else if (vmode == V16SFmode)
47309 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47310 else if (vmode == V16SImode)
47311 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47313 gcc_unreachable ();
47317 op1 = gen_lowpart (vmode, d->op1);
47318 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47320 if (target != d->target)
47321 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47326 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47327 in a single instruction. */
47330 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47332 unsigned i, nelt = d->nelt;
47333 unsigned char perm2[MAX_VECT_LEN];
47335 /* Check plain VEC_SELECT first, because AVX has instructions that could
47336 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47337 input where SEL+CONCAT may not. */
47338 if (d->one_operand_p)
47340 int mask = nelt - 1;
47341 bool identity_perm = true;
47342 bool broadcast_perm = true;
47344 for (i = 0; i < nelt; i++)
47346 perm2[i] = d->perm[i] & mask;
47348 identity_perm = false;
47350 broadcast_perm = false;
47356 emit_move_insn (d->target, d->op0);
47359 else if (broadcast_perm && TARGET_AVX2)
47361 /* Use vpbroadcast{b,w,d}. */
47362 rtx (*gen) (rtx, rtx) = NULL;
47366 if (TARGET_AVX512BW)
47367 gen = gen_avx512bw_vec_dupv64qi_1;
47370 gen = gen_avx2_pbroadcastv32qi_1;
47373 if (TARGET_AVX512BW)
47374 gen = gen_avx512bw_vec_dupv32hi_1;
47377 gen = gen_avx2_pbroadcastv16hi_1;
47380 if (TARGET_AVX512F)
47381 gen = gen_avx512f_vec_dupv16si_1;
47384 gen = gen_avx2_pbroadcastv8si_1;
47387 gen = gen_avx2_pbroadcastv16qi;
47390 gen = gen_avx2_pbroadcastv8hi;
47393 if (TARGET_AVX512F)
47394 gen = gen_avx512f_vec_dupv16sf_1;
47397 gen = gen_avx2_vec_dupv8sf_1;
47400 if (TARGET_AVX512F)
47401 gen = gen_avx512f_vec_dupv8df_1;
47404 if (TARGET_AVX512F)
47405 gen = gen_avx512f_vec_dupv8di_1;
47407 /* For other modes prefer other shuffles this function creates. */
47413 emit_insn (gen (d->target, d->op0));
47418 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47421 /* There are plenty of patterns in sse.md that are written for
47422 SEL+CONCAT and are not replicated for a single op. Perhaps
47423 that should be changed, to avoid the nastiness here. */
47425 /* Recognize interleave style patterns, which means incrementing
47426 every other permutation operand. */
47427 for (i = 0; i < nelt; i += 2)
47429 perm2[i] = d->perm[i] & mask;
47430 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47432 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47436 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47439 for (i = 0; i < nelt; i += 4)
47441 perm2[i + 0] = d->perm[i + 0] & mask;
47442 perm2[i + 1] = d->perm[i + 1] & mask;
47443 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47444 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47447 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47453 /* Finally, try the fully general two operand permute. */
47454 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47458 /* Recognize interleave style patterns with reversed operands. */
47459 if (!d->one_operand_p)
47461 for (i = 0; i < nelt; ++i)
47463 unsigned e = d->perm[i];
47471 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47476 /* Try the SSE4.1 blend variable merge instructions. */
47477 if (expand_vec_perm_blend (d))
47480 /* Try one of the AVX vpermil variable permutations. */
47481 if (expand_vec_perm_vpermil (d))
47484 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47485 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47486 if (expand_vec_perm_pshufb (d))
47489 /* Try the AVX2 vpalignr instruction. */
47490 if (expand_vec_perm_palignr (d, true))
47493 /* Try the AVX512F vpermi2 instructions. */
47494 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47500 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47501 in terms of a pair of pshuflw + pshufhw instructions. */
47504 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47506 unsigned char perm2[MAX_VECT_LEN];
47510 if (d->vmode != V8HImode || !d->one_operand_p)
47513 /* The two permutations only operate in 64-bit lanes. */
47514 for (i = 0; i < 4; ++i)
47515 if (d->perm[i] >= 4)
47517 for (i = 4; i < 8; ++i)
47518 if (d->perm[i] < 4)
47524 /* Emit the pshuflw. */
47525 memcpy (perm2, d->perm, 4);
47526 for (i = 4; i < 8; ++i)
47528 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47531 /* Emit the pshufhw. */
47532 memcpy (perm2 + 4, d->perm + 4, 4);
47533 for (i = 0; i < 4; ++i)
47535 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47541 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47542 the permutation using the SSSE3 palignr instruction. This succeeds
47543 when all of the elements in PERM fit within one vector and we merely
47544 need to shift them down so that a single vector permutation has a
47545 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47546 the vpalignr instruction itself can perform the requested permutation. */
47549 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47551 unsigned i, nelt = d->nelt;
47552 unsigned min, max, minswap, maxswap;
47553 bool in_order, ok, swap = false;
47555 struct expand_vec_perm_d dcopy;
47557 /* Even with AVX, palignr only operates on 128-bit vectors,
47558 in AVX2 palignr operates on both 128-bit lanes. */
47559 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47560 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47565 minswap = 2 * nelt;
47567 for (i = 0; i < nelt; ++i)
47569 unsigned e = d->perm[i];
47570 unsigned eswap = d->perm[i] ^ nelt;
47571 if (GET_MODE_SIZE (d->vmode) == 32)
47573 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47574 eswap = e ^ (nelt / 2);
47580 if (eswap < minswap)
47582 if (eswap > maxswap)
47586 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47588 if (d->one_operand_p
47590 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47591 ? nelt / 2 : nelt))
47598 /* Given that we have SSSE3, we know we'll be able to implement the
47599 single operand permutation after the palignr with pshufb for
47600 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47602 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47608 dcopy.op0 = d->op1;
47609 dcopy.op1 = d->op0;
47610 for (i = 0; i < nelt; ++i)
47611 dcopy.perm[i] ^= nelt;
47615 for (i = 0; i < nelt; ++i)
47617 unsigned e = dcopy.perm[i];
47618 if (GET_MODE_SIZE (d->vmode) == 32
47620 && (e & (nelt / 2 - 1)) < min)
47621 e = e - min - (nelt / 2);
47628 dcopy.one_operand_p = true;
47630 if (single_insn_only_p && !in_order)
47633 /* For AVX2, test whether we can permute the result in one instruction. */
47638 dcopy.op1 = dcopy.op0;
47639 return expand_vec_perm_1 (&dcopy);
47642 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47643 if (GET_MODE_SIZE (d->vmode) == 16)
47645 target = gen_reg_rtx (TImode);
47646 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47647 gen_lowpart (TImode, dcopy.op0), shift));
47651 target = gen_reg_rtx (V2TImode);
47652 emit_insn (gen_avx2_palignrv2ti (target,
47653 gen_lowpart (V2TImode, dcopy.op1),
47654 gen_lowpart (V2TImode, dcopy.op0),
47658 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47660 /* Test for the degenerate case where the alignment by itself
47661 produces the desired permutation. */
47664 emit_move_insn (d->target, dcopy.op0);
47668 ok = expand_vec_perm_1 (&dcopy);
47669 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47674 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
47675 the permutation using the SSE4_1 pblendv instruction. Potentially
47676 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
47679 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47681 unsigned i, which, nelt = d->nelt;
47682 struct expand_vec_perm_d dcopy, dcopy1;
47683 machine_mode vmode = d->vmode;
47686 /* Use the same checks as in expand_vec_perm_blend. */
47687 if (d->one_operand_p)
47689 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47691 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47693 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47698 /* Figure out where permutation elements stay not in their
47699 respective lanes. */
47700 for (i = 0, which = 0; i < nelt; ++i)
47702 unsigned e = d->perm[i];
47704 which |= (e < nelt ? 1 : 2);
47706 /* We can pblend the part where elements stay not in their
47707 respective lanes only when these elements are all in one
47708 half of a permutation.
47709 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
47710 lanes, but both 8 and 9 >= 8
47711 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
47712 respective lanes and 8 >= 8, but 2 not. */
47713 if (which != 1 && which != 2)
47715 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
47718 /* First we apply one operand permutation to the part where
47719 elements stay not in their respective lanes. */
47722 dcopy.op0 = dcopy.op1 = d->op1;
47724 dcopy.op0 = dcopy.op1 = d->op0;
47726 dcopy.target = gen_reg_rtx (vmode);
47727 dcopy.one_operand_p = true;
47729 for (i = 0; i < nelt; ++i)
47730 dcopy.perm[i] = d->perm[i] & (nelt - 1);
47732 ok = expand_vec_perm_1 (&dcopy);
47733 if (GET_MODE_SIZE (vmode) != 16 && !ok)
47740 /* Next we put permuted elements into their positions. */
47743 dcopy1.op1 = dcopy.target;
47745 dcopy1.op0 = dcopy.target;
47747 for (i = 0; i < nelt; ++i)
47748 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
47750 ok = expand_vec_perm_blend (&dcopy1);
47756 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
47758 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47759 a two vector permutation into a single vector permutation by using
47760 an interleave operation to merge the vectors. */
47763 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
47765 struct expand_vec_perm_d dremap, dfinal;
47766 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
47767 unsigned HOST_WIDE_INT contents;
47768 unsigned char remap[2 * MAX_VECT_LEN];
47770 bool ok, same_halves = false;
47772 if (GET_MODE_SIZE (d->vmode) == 16)
47774 if (d->one_operand_p)
47777 else if (GET_MODE_SIZE (d->vmode) == 32)
47781 /* For 32-byte modes allow even d->one_operand_p.
47782 The lack of cross-lane shuffling in some instructions
47783 might prevent a single insn shuffle. */
47785 dfinal.testing_p = true;
47786 /* If expand_vec_perm_interleave3 can expand this into
47787 a 3 insn sequence, give up and let it be expanded as
47788 3 insn sequence. While that is one insn longer,
47789 it doesn't need a memory operand and in the common
47790 case that both interleave low and high permutations
47791 with the same operands are adjacent needs 4 insns
47792 for both after CSE. */
47793 if (expand_vec_perm_interleave3 (&dfinal))
47799 /* Examine from whence the elements come. */
47801 for (i = 0; i < nelt; ++i)
47802 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
47804 memset (remap, 0xff, sizeof (remap));
47807 if (GET_MODE_SIZE (d->vmode) == 16)
47809 unsigned HOST_WIDE_INT h1, h2, h3, h4;
47811 /* Split the two input vectors into 4 halves. */
47812 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
47817 /* If the elements from the low halves use interleave low, and similarly
47818 for interleave high. If the elements are from mis-matched halves, we
47819 can use shufps for V4SF/V4SI or do a DImode shuffle. */
47820 if ((contents & (h1 | h3)) == contents)
47823 for (i = 0; i < nelt2; ++i)
47826 remap[i + nelt] = i * 2 + 1;
47827 dremap.perm[i * 2] = i;
47828 dremap.perm[i * 2 + 1] = i + nelt;
47830 if (!TARGET_SSE2 && d->vmode == V4SImode)
47831 dremap.vmode = V4SFmode;
47833 else if ((contents & (h2 | h4)) == contents)
47836 for (i = 0; i < nelt2; ++i)
47838 remap[i + nelt2] = i * 2;
47839 remap[i + nelt + nelt2] = i * 2 + 1;
47840 dremap.perm[i * 2] = i + nelt2;
47841 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
47843 if (!TARGET_SSE2 && d->vmode == V4SImode)
47844 dremap.vmode = V4SFmode;
47846 else if ((contents & (h1 | h4)) == contents)
47849 for (i = 0; i < nelt2; ++i)
47852 remap[i + nelt + nelt2] = i + nelt2;
47853 dremap.perm[i] = i;
47854 dremap.perm[i + nelt2] = i + nelt + nelt2;
47859 dremap.vmode = V2DImode;
47861 dremap.perm[0] = 0;
47862 dremap.perm[1] = 3;
47865 else if ((contents & (h2 | h3)) == contents)
47868 for (i = 0; i < nelt2; ++i)
47870 remap[i + nelt2] = i;
47871 remap[i + nelt] = i + nelt2;
47872 dremap.perm[i] = i + nelt2;
47873 dremap.perm[i + nelt2] = i + nelt;
47878 dremap.vmode = V2DImode;
47880 dremap.perm[0] = 1;
47881 dremap.perm[1] = 2;
47889 unsigned int nelt4 = nelt / 4, nzcnt = 0;
47890 unsigned HOST_WIDE_INT q[8];
47891 unsigned int nonzero_halves[4];
47893 /* Split the two input vectors into 8 quarters. */
47894 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
47895 for (i = 1; i < 8; ++i)
47896 q[i] = q[0] << (nelt4 * i);
47897 for (i = 0; i < 4; ++i)
47898 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
47900 nonzero_halves[nzcnt] = i;
47906 gcc_assert (d->one_operand_p);
47907 nonzero_halves[1] = nonzero_halves[0];
47908 same_halves = true;
47910 else if (d->one_operand_p)
47912 gcc_assert (nonzero_halves[0] == 0);
47913 gcc_assert (nonzero_halves[1] == 1);
47918 if (d->perm[0] / nelt2 == nonzero_halves[1])
47920 /* Attempt to increase the likelihood that dfinal
47921 shuffle will be intra-lane. */
47922 char tmph = nonzero_halves[0];
47923 nonzero_halves[0] = nonzero_halves[1];
47924 nonzero_halves[1] = tmph;
47927 /* vperm2f128 or vperm2i128. */
47928 for (i = 0; i < nelt2; ++i)
47930 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
47931 remap[i + nonzero_halves[0] * nelt2] = i;
47932 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
47933 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
47936 if (d->vmode != V8SFmode
47937 && d->vmode != V4DFmode
47938 && d->vmode != V8SImode)
47940 dremap.vmode = V8SImode;
47942 for (i = 0; i < 4; ++i)
47944 dremap.perm[i] = i + nonzero_halves[0] * 4;
47945 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
47949 else if (d->one_operand_p)
47951 else if (TARGET_AVX2
47952 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
47955 for (i = 0; i < nelt4; ++i)
47958 remap[i + nelt] = i * 2 + 1;
47959 remap[i + nelt2] = i * 2 + nelt2;
47960 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
47961 dremap.perm[i * 2] = i;
47962 dremap.perm[i * 2 + 1] = i + nelt;
47963 dremap.perm[i * 2 + nelt2] = i + nelt2;
47964 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
47967 else if (TARGET_AVX2
47968 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
47971 for (i = 0; i < nelt4; ++i)
47973 remap[i + nelt4] = i * 2;
47974 remap[i + nelt + nelt4] = i * 2 + 1;
47975 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
47976 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
47977 dremap.perm[i * 2] = i + nelt4;
47978 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
47979 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
47980 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
47987 /* Use the remapping array set up above to move the elements from their
47988 swizzled locations into their final destinations. */
47990 for (i = 0; i < nelt; ++i)
47992 unsigned e = remap[d->perm[i]];
47993 gcc_assert (e < nelt);
47994 /* If same_halves is true, both halves of the remapped vector are the
47995 same. Avoid cross-lane accesses if possible. */
47996 if (same_halves && i >= nelt2)
47998 gcc_assert (e < nelt2);
47999 dfinal.perm[i] = e + nelt2;
48002 dfinal.perm[i] = e;
48006 dremap.target = gen_reg_rtx (dremap.vmode);
48007 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48009 dfinal.op1 = dfinal.op0;
48010 dfinal.one_operand_p = true;
48012 /* Test if the final remap can be done with a single insn. For V4SFmode or
48013 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
48015 ok = expand_vec_perm_1 (&dfinal);
48016 seq = get_insns ();
48025 if (dremap.vmode != dfinal.vmode)
48027 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
48028 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
48031 ok = expand_vec_perm_1 (&dremap);
48038 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48039 a single vector cross-lane permutation into vpermq followed
48040 by any of the single insn permutations. */
48043 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
48045 struct expand_vec_perm_d dremap, dfinal;
48046 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
48047 unsigned contents[2];
48051 && (d->vmode == V32QImode || d->vmode == V16HImode)
48052 && d->one_operand_p))
48057 for (i = 0; i < nelt2; ++i)
48059 contents[0] |= 1u << (d->perm[i] / nelt4);
48060 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
48063 for (i = 0; i < 2; ++i)
48065 unsigned int cnt = 0;
48066 for (j = 0; j < 4; ++j)
48067 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
48075 dremap.vmode = V4DImode;
48077 dremap.target = gen_reg_rtx (V4DImode);
48078 dremap.op0 = gen_lowpart (V4DImode, d->op0);
48079 dremap.op1 = dremap.op0;
48080 dremap.one_operand_p = true;
48081 for (i = 0; i < 2; ++i)
48083 unsigned int cnt = 0;
48084 for (j = 0; j < 4; ++j)
48085 if ((contents[i] & (1u << j)) != 0)
48086 dremap.perm[2 * i + cnt++] = j;
48087 for (; cnt < 2; ++cnt)
48088 dremap.perm[2 * i + cnt] = 0;
48092 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48093 dfinal.op1 = dfinal.op0;
48094 dfinal.one_operand_p = true;
48095 for (i = 0, j = 0; i < nelt; ++i)
48099 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
48100 if ((d->perm[i] / nelt4) == dremap.perm[j])
48102 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
48103 dfinal.perm[i] |= nelt4;
48105 gcc_unreachable ();
48108 ok = expand_vec_perm_1 (&dremap);
48111 ok = expand_vec_perm_1 (&dfinal);
48117 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
48118 a vector permutation using two instructions, vperm2f128 resp.
48119 vperm2i128 followed by any single in-lane permutation. */
48122 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
48124 struct expand_vec_perm_d dfirst, dsecond;
48125 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
48129 || GET_MODE_SIZE (d->vmode) != 32
48130 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
48134 dsecond.one_operand_p = false;
48135 dsecond.testing_p = true;
48137 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48138 immediate. For perm < 16 the second permutation uses
48139 d->op0 as first operand, for perm >= 16 it uses d->op1
48140 as first operand. The second operand is the result of
48142 for (perm = 0; perm < 32; perm++)
48144 /* Ignore permutations which do not move anything cross-lane. */
48147 /* The second shuffle for e.g. V4DFmode has
48148 0123 and ABCD operands.
48149 Ignore AB23, as 23 is already in the second lane
48150 of the first operand. */
48151 if ((perm & 0xc) == (1 << 2)) continue;
48152 /* And 01CD, as 01 is in the first lane of the first
48154 if ((perm & 3) == 0) continue;
48155 /* And 4567, as then the vperm2[fi]128 doesn't change
48156 anything on the original 4567 second operand. */
48157 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
48161 /* The second shuffle for e.g. V4DFmode has
48162 4567 and ABCD operands.
48163 Ignore AB67, as 67 is already in the second lane
48164 of the first operand. */
48165 if ((perm & 0xc) == (3 << 2)) continue;
48166 /* And 45CD, as 45 is in the first lane of the first
48168 if ((perm & 3) == 2) continue;
48169 /* And 0123, as then the vperm2[fi]128 doesn't change
48170 anything on the original 0123 first operand. */
48171 if ((perm & 0xf) == (1 << 2)) continue;
48174 for (i = 0; i < nelt; i++)
48176 j = d->perm[i] / nelt2;
48177 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48178 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48179 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48180 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48188 ok = expand_vec_perm_1 (&dsecond);
48199 /* Found a usable second shuffle. dfirst will be
48200 vperm2f128 on d->op0 and d->op1. */
48201 dsecond.testing_p = false;
48203 dfirst.target = gen_reg_rtx (d->vmode);
48204 for (i = 0; i < nelt; i++)
48205 dfirst.perm[i] = (i & (nelt2 - 1))
48206 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48208 canonicalize_perm (&dfirst);
48209 ok = expand_vec_perm_1 (&dfirst);
48212 /* And dsecond is some single insn shuffle, taking
48213 d->op0 and result of vperm2f128 (if perm < 16) or
48214 d->op1 and result of vperm2f128 (otherwise). */
48216 dsecond.op0 = dsecond.op1;
48217 dsecond.op1 = dfirst.target;
48219 ok = expand_vec_perm_1 (&dsecond);
48225 /* For one operand, the only useful vperm2f128 permutation is 0x01
48227 if (d->one_operand_p)
48234 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48235 a two vector permutation using 2 intra-lane interleave insns
48236 and cross-lane shuffle for 32-byte vectors. */
48239 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48242 rtx (*gen) (rtx, rtx, rtx);
48244 if (d->one_operand_p)
48246 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48248 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48254 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48256 for (i = 0; i < nelt; i += 2)
48257 if (d->perm[i] != d->perm[0] + i / 2
48258 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48268 gen = gen_vec_interleave_highv32qi;
48270 gen = gen_vec_interleave_lowv32qi;
48274 gen = gen_vec_interleave_highv16hi;
48276 gen = gen_vec_interleave_lowv16hi;
48280 gen = gen_vec_interleave_highv8si;
48282 gen = gen_vec_interleave_lowv8si;
48286 gen = gen_vec_interleave_highv4di;
48288 gen = gen_vec_interleave_lowv4di;
48292 gen = gen_vec_interleave_highv8sf;
48294 gen = gen_vec_interleave_lowv8sf;
48298 gen = gen_vec_interleave_highv4df;
48300 gen = gen_vec_interleave_lowv4df;
48303 gcc_unreachable ();
48306 emit_insn (gen (d->target, d->op0, d->op1));
48310 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48311 a single vector permutation using a single intra-lane vector
48312 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48313 the non-swapped and swapped vectors together. */
48316 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48318 struct expand_vec_perm_d dfirst, dsecond;
48319 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48322 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48326 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48327 || !d->one_operand_p)
48331 for (i = 0; i < nelt; i++)
48332 dfirst.perm[i] = 0xff;
48333 for (i = 0, msk = 0; i < nelt; i++)
48335 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48336 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48338 dfirst.perm[j] = d->perm[i];
48342 for (i = 0; i < nelt; i++)
48343 if (dfirst.perm[i] == 0xff)
48344 dfirst.perm[i] = i;
48347 dfirst.target = gen_reg_rtx (dfirst.vmode);
48350 ok = expand_vec_perm_1 (&dfirst);
48351 seq = get_insns ();
48363 dsecond.op0 = dfirst.target;
48364 dsecond.op1 = dfirst.target;
48365 dsecond.one_operand_p = true;
48366 dsecond.target = gen_reg_rtx (dsecond.vmode);
48367 for (i = 0; i < nelt; i++)
48368 dsecond.perm[i] = i ^ nelt2;
48370 ok = expand_vec_perm_1 (&dsecond);
48373 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48374 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48378 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48379 permutation using two vperm2f128, followed by a vshufpd insn blending
48380 the two vectors together. */
48383 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48385 struct expand_vec_perm_d dfirst, dsecond, dthird;
48388 if (!TARGET_AVX || (d->vmode != V4DFmode))
48398 dfirst.perm[0] = (d->perm[0] & ~1);
48399 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48400 dfirst.perm[2] = (d->perm[2] & ~1);
48401 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48402 dsecond.perm[0] = (d->perm[1] & ~1);
48403 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48404 dsecond.perm[2] = (d->perm[3] & ~1);
48405 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48406 dthird.perm[0] = (d->perm[0] % 2);
48407 dthird.perm[1] = (d->perm[1] % 2) + 4;
48408 dthird.perm[2] = (d->perm[2] % 2) + 2;
48409 dthird.perm[3] = (d->perm[3] % 2) + 6;
48411 dfirst.target = gen_reg_rtx (dfirst.vmode);
48412 dsecond.target = gen_reg_rtx (dsecond.vmode);
48413 dthird.op0 = dfirst.target;
48414 dthird.op1 = dsecond.target;
48415 dthird.one_operand_p = false;
48417 canonicalize_perm (&dfirst);
48418 canonicalize_perm (&dsecond);
48420 ok = expand_vec_perm_1 (&dfirst)
48421 && expand_vec_perm_1 (&dsecond)
48422 && expand_vec_perm_1 (&dthird);
48429 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48430 permutation with two pshufb insns and an ior. We should have already
48431 failed all two instruction sequences. */
48434 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48436 rtx rperm[2][16], vperm, l, h, op, m128;
48437 unsigned int i, nelt, eltsz;
48439 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48441 gcc_assert (!d->one_operand_p);
48447 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48449 /* Generate two permutation masks. If the required element is within
48450 the given vector it is shuffled into the proper lane. If the required
48451 element is in the other vector, force a zero into the lane by setting
48452 bit 7 in the permutation mask. */
48453 m128 = GEN_INT (-128);
48454 for (i = 0; i < nelt; ++i)
48456 unsigned j, e = d->perm[i];
48457 unsigned which = (e >= nelt);
48461 for (j = 0; j < eltsz; ++j)
48463 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48464 rperm[1-which][i*eltsz + j] = m128;
48468 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48469 vperm = force_reg (V16QImode, vperm);
48471 l = gen_reg_rtx (V16QImode);
48472 op = gen_lowpart (V16QImode, d->op0);
48473 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48475 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48476 vperm = force_reg (V16QImode, vperm);
48478 h = gen_reg_rtx (V16QImode);
48479 op = gen_lowpart (V16QImode, d->op1);
48480 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48483 if (d->vmode != V16QImode)
48484 op = gen_reg_rtx (V16QImode);
48485 emit_insn (gen_iorv16qi3 (op, l, h));
48486 if (op != d->target)
48487 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48492 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48493 with two vpshufb insns, vpermq and vpor. We should have already failed
48494 all two or three instruction sequences. */
48497 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48499 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48500 unsigned int i, nelt, eltsz;
48503 || !d->one_operand_p
48504 || (d->vmode != V32QImode && d->vmode != V16HImode))
48511 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48513 /* Generate two permutation masks. If the required element is within
48514 the same lane, it is shuffled in. If the required element from the
48515 other lane, force a zero by setting bit 7 in the permutation mask.
48516 In the other mask the mask has non-negative elements if element
48517 is requested from the other lane, but also moved to the other lane,
48518 so that the result of vpshufb can have the two V2TImode halves
48520 m128 = GEN_INT (-128);
48521 for (i = 0; i < nelt; ++i)
48523 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48524 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48526 for (j = 0; j < eltsz; ++j)
48528 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48529 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48533 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48534 vperm = force_reg (V32QImode, vperm);
48536 h = gen_reg_rtx (V32QImode);
48537 op = gen_lowpart (V32QImode, d->op0);
48538 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48540 /* Swap the 128-byte lanes of h into hp. */
48541 hp = gen_reg_rtx (V4DImode);
48542 op = gen_lowpart (V4DImode, h);
48543 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48546 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48547 vperm = force_reg (V32QImode, vperm);
48549 l = gen_reg_rtx (V32QImode);
48550 op = gen_lowpart (V32QImode, d->op0);
48551 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48554 if (d->vmode != V32QImode)
48555 op = gen_reg_rtx (V32QImode);
48556 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48557 if (op != d->target)
48558 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48563 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48564 and extract-odd permutations of two V32QImode and V16QImode operand
48565 with two vpshufb insns, vpor and vpermq. We should have already
48566 failed all two or three instruction sequences. */
48569 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48571 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48572 unsigned int i, nelt, eltsz;
48575 || d->one_operand_p
48576 || (d->vmode != V32QImode && d->vmode != V16HImode))
48579 for (i = 0; i < d->nelt; ++i)
48580 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48587 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48589 /* Generate two permutation masks. In the first permutation mask
48590 the first quarter will contain indexes for the first half
48591 of the op0, the second quarter will contain bit 7 set, third quarter
48592 will contain indexes for the second half of the op0 and the
48593 last quarter bit 7 set. In the second permutation mask
48594 the first quarter will contain bit 7 set, the second quarter
48595 indexes for the first half of the op1, the third quarter bit 7 set
48596 and last quarter indexes for the second half of the op1.
48597 I.e. the first mask e.g. for V32QImode extract even will be:
48598 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48599 (all values masked with 0xf except for -128) and second mask
48600 for extract even will be
48601 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48602 m128 = GEN_INT (-128);
48603 for (i = 0; i < nelt; ++i)
48605 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48606 unsigned which = d->perm[i] >= nelt;
48607 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48609 for (j = 0; j < eltsz; ++j)
48611 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48612 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48616 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48617 vperm = force_reg (V32QImode, vperm);
48619 l = gen_reg_rtx (V32QImode);
48620 op = gen_lowpart (V32QImode, d->op0);
48621 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48623 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48624 vperm = force_reg (V32QImode, vperm);
48626 h = gen_reg_rtx (V32QImode);
48627 op = gen_lowpart (V32QImode, d->op1);
48628 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48630 ior = gen_reg_rtx (V32QImode);
48631 emit_insn (gen_iorv32qi3 (ior, l, h));
48633 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48634 op = gen_reg_rtx (V4DImode);
48635 ior = gen_lowpart (V4DImode, ior);
48636 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48637 const1_rtx, GEN_INT (3)));
48638 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48643 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48644 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48645 with two "and" and "pack" or two "shift" and "pack" insns. We should
48646 have already failed all two instruction sequences. */
48649 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48651 rtx op, dop0, dop1, t, rperm[16];
48652 unsigned i, odd, c, s, nelt = d->nelt;
48653 bool end_perm = false;
48654 machine_mode half_mode;
48655 rtx (*gen_and) (rtx, rtx, rtx);
48656 rtx (*gen_pack) (rtx, rtx, rtx);
48657 rtx (*gen_shift) (rtx, rtx, rtx);
48659 if (d->one_operand_p)
48665 /* Required for "pack". */
48666 if (!TARGET_SSE4_1)
48670 half_mode = V4SImode;
48671 gen_and = gen_andv4si3;
48672 gen_pack = gen_sse4_1_packusdw;
48673 gen_shift = gen_lshrv4si3;
48676 /* No check as all instructions are SSE2. */
48679 half_mode = V8HImode;
48680 gen_and = gen_andv8hi3;
48681 gen_pack = gen_sse2_packuswb;
48682 gen_shift = gen_lshrv8hi3;
48689 half_mode = V8SImode;
48690 gen_and = gen_andv8si3;
48691 gen_pack = gen_avx2_packusdw;
48692 gen_shift = gen_lshrv8si3;
48700 half_mode = V16HImode;
48701 gen_and = gen_andv16hi3;
48702 gen_pack = gen_avx2_packuswb;
48703 gen_shift = gen_lshrv16hi3;
48707 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
48708 general shuffles. */
48712 /* Check that permutation is even or odd. */
48717 for (i = 1; i < nelt; ++i)
48718 if (d->perm[i] != 2 * i + odd)
48724 dop0 = gen_reg_rtx (half_mode);
48725 dop1 = gen_reg_rtx (half_mode);
48728 for (i = 0; i < nelt / 2; i++)
48729 rperm[i] = GEN_INT (c);
48730 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
48731 t = force_reg (half_mode, t);
48732 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
48733 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
48737 emit_insn (gen_shift (dop0,
48738 gen_lowpart (half_mode, d->op0),
48740 emit_insn (gen_shift (dop1,
48741 gen_lowpart (half_mode, d->op1),
48744 /* In AVX2 for 256 bit case we need to permute pack result. */
48745 if (TARGET_AVX2 && end_perm)
48747 op = gen_reg_rtx (d->vmode);
48748 t = gen_reg_rtx (V4DImode);
48749 emit_insn (gen_pack (op, dop0, dop1));
48750 emit_insn (gen_avx2_permv4di_1 (t,
48751 gen_lowpart (V4DImode, op),
48756 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
48759 emit_insn (gen_pack (d->target, dop0, dop1));
48764 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
48765 and extract-odd permutations. */
48768 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
48770 rtx t1, t2, t3, t4, t5;
48777 t1 = gen_reg_rtx (V4DFmode);
48778 t2 = gen_reg_rtx (V4DFmode);
48780 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48781 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
48782 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
48784 /* Now an unpck[lh]pd will produce the result required. */
48786 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
48788 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
48794 int mask = odd ? 0xdd : 0x88;
48798 t1 = gen_reg_rtx (V8SFmode);
48799 t2 = gen_reg_rtx (V8SFmode);
48800 t3 = gen_reg_rtx (V8SFmode);
48802 /* Shuffle within the 128-bit lanes to produce:
48803 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
48804 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
48807 /* Shuffle the lanes around to produce:
48808 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
48809 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
48812 /* Shuffle within the 128-bit lanes to produce:
48813 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
48814 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
48816 /* Shuffle within the 128-bit lanes to produce:
48817 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
48818 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
48820 /* Shuffle the lanes around to produce:
48821 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
48822 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
48831 /* These are always directly implementable by expand_vec_perm_1. */
48832 gcc_unreachable ();
48836 return expand_vec_perm_even_odd_pack (d);
48837 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
48838 return expand_vec_perm_pshufb2 (d);
48843 /* We need 2*log2(N)-1 operations to achieve odd/even
48844 with interleave. */
48845 t1 = gen_reg_rtx (V8HImode);
48846 t2 = gen_reg_rtx (V8HImode);
48847 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
48848 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
48849 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
48850 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
48852 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
48854 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
48860 return expand_vec_perm_even_odd_pack (d);
48864 return expand_vec_perm_even_odd_pack (d);
48869 struct expand_vec_perm_d d_copy = *d;
48870 d_copy.vmode = V4DFmode;
48872 d_copy.target = gen_lowpart (V4DFmode, d->target);
48874 d_copy.target = gen_reg_rtx (V4DFmode);
48875 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
48876 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
48877 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48880 emit_move_insn (d->target,
48881 gen_lowpart (V4DImode, d_copy.target));
48890 t1 = gen_reg_rtx (V4DImode);
48891 t2 = gen_reg_rtx (V4DImode);
48893 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
48894 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
48895 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
48897 /* Now an vpunpck[lh]qdq will produce the result required. */
48899 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
48901 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
48908 struct expand_vec_perm_d d_copy = *d;
48909 d_copy.vmode = V8SFmode;
48911 d_copy.target = gen_lowpart (V8SFmode, d->target);
48913 d_copy.target = gen_reg_rtx (V8SFmode);
48914 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
48915 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
48916 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
48919 emit_move_insn (d->target,
48920 gen_lowpart (V8SImode, d_copy.target));
48929 t1 = gen_reg_rtx (V8SImode);
48930 t2 = gen_reg_rtx (V8SImode);
48931 t3 = gen_reg_rtx (V4DImode);
48932 t4 = gen_reg_rtx (V4DImode);
48933 t5 = gen_reg_rtx (V4DImode);
48935 /* Shuffle the lanes around into
48936 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
48937 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
48938 gen_lowpart (V4DImode, d->op1),
48940 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
48941 gen_lowpart (V4DImode, d->op1),
48944 /* Swap the 2nd and 3rd position in each lane into
48945 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
48946 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
48947 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48948 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
48949 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
48951 /* Now an vpunpck[lh]qdq will produce
48952 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
48954 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
48955 gen_lowpart (V4DImode, t2));
48957 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
48958 gen_lowpart (V4DImode, t2));
48960 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
48964 gcc_unreachable ();
48970 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
48971 extract-even and extract-odd permutations. */
48974 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
48976 unsigned i, odd, nelt = d->nelt;
48979 if (odd != 0 && odd != 1)
48982 for (i = 1; i < nelt; ++i)
48983 if (d->perm[i] != 2 * i + odd)
48986 return expand_vec_perm_even_odd_1 (d, odd);
48989 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
48990 permutations. We assume that expand_vec_perm_1 has already failed. */
48993 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
48995 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
48996 machine_mode vmode = d->vmode;
48997 unsigned char perm2[4];
48998 rtx op0 = d->op0, dest;
49005 /* These are special-cased in sse.md so that we can optionally
49006 use the vbroadcast instruction. They expand to two insns
49007 if the input happens to be in a register. */
49008 gcc_unreachable ();
49014 /* These are always implementable using standard shuffle patterns. */
49015 gcc_unreachable ();
49019 /* These can be implemented via interleave. We save one insn by
49020 stopping once we have promoted to V4SImode and then use pshufd. */
49026 rtx (*gen) (rtx, rtx, rtx)
49027 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
49028 : gen_vec_interleave_lowv8hi;
49032 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
49033 : gen_vec_interleave_highv8hi;
49038 dest = gen_reg_rtx (vmode);
49039 emit_insn (gen (dest, op0, op0));
49040 vmode = get_mode_wider_vector (vmode);
49041 op0 = gen_lowpart (vmode, dest);
49043 while (vmode != V4SImode);
49045 memset (perm2, elt, 4);
49046 dest = gen_reg_rtx (V4SImode);
49047 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
49050 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
49058 /* For AVX2 broadcasts of the first element vpbroadcast* or
49059 vpermq should be used by expand_vec_perm_1. */
49060 gcc_assert (!TARGET_AVX2 || d->perm[0]);
49064 gcc_unreachable ();
49068 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49069 broadcast permutations. */
49072 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
49074 unsigned i, elt, nelt = d->nelt;
49076 if (!d->one_operand_p)
49080 for (i = 1; i < nelt; ++i)
49081 if (d->perm[i] != elt)
49084 return expand_vec_perm_broadcast_1 (d);
49087 /* Implement arbitrary permutations of two V64QImode operands
49088 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
49090 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
49092 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
49098 struct expand_vec_perm_d ds[2];
49099 rtx rperm[128], vperm, target0, target1;
49100 unsigned int i, nelt;
49101 machine_mode vmode;
49106 for (i = 0; i < 2; i++)
49109 ds[i].vmode = V32HImode;
49111 ds[i].target = gen_reg_rtx (V32HImode);
49112 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
49113 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
49116 /* Prepare permutations such that the first one takes care of
49117 putting the even bytes into the right positions or one higher
49118 positions (ds[0]) and the second one takes care of
49119 putting the odd bytes into the right positions or one below
49122 for (i = 0; i < nelt; i++)
49124 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
49127 rperm[i] = constm1_rtx;
49128 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49132 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49133 rperm[i + 64] = constm1_rtx;
49137 bool ok = expand_vec_perm_1 (&ds[0]);
49139 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
49141 ok = expand_vec_perm_1 (&ds[1]);
49143 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
49145 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
49146 vperm = force_reg (vmode, vperm);
49147 target0 = gen_reg_rtx (V64QImode);
49148 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
49150 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
49151 vperm = force_reg (vmode, vperm);
49152 target1 = gen_reg_rtx (V64QImode);
49153 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
49155 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
49159 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
49160 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
49161 all the shorter instruction sequences. */
49164 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
49166 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
49167 unsigned int i, nelt, eltsz;
49171 || d->one_operand_p
49172 || (d->vmode != V32QImode && d->vmode != V16HImode))
49179 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
49181 /* Generate 4 permutation masks. If the required element is within
49182 the same lane, it is shuffled in. If the required element from the
49183 other lane, force a zero by setting bit 7 in the permutation mask.
49184 In the other mask the mask has non-negative elements if element
49185 is requested from the other lane, but also moved to the other lane,
49186 so that the result of vpshufb can have the two V2TImode halves
49188 m128 = GEN_INT (-128);
49189 for (i = 0; i < 32; ++i)
49191 rperm[0][i] = m128;
49192 rperm[1][i] = m128;
49193 rperm[2][i] = m128;
49194 rperm[3][i] = m128;
49200 for (i = 0; i < nelt; ++i)
49202 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49203 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
49204 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
49206 for (j = 0; j < eltsz; ++j)
49207 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
49208 used[which] = true;
49211 for (i = 0; i < 2; ++i)
49213 if (!used[2 * i + 1])
49218 vperm = gen_rtx_CONST_VECTOR (V32QImode,
49219 gen_rtvec_v (32, rperm[2 * i + 1]));
49220 vperm = force_reg (V32QImode, vperm);
49221 h[i] = gen_reg_rtx (V32QImode);
49222 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49223 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
49226 /* Swap the 128-byte lanes of h[X]. */
49227 for (i = 0; i < 2; ++i)
49229 if (h[i] == NULL_RTX)
49231 op = gen_reg_rtx (V4DImode);
49232 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
49233 const2_rtx, GEN_INT (3), const0_rtx,
49235 h[i] = gen_lowpart (V32QImode, op);
49238 for (i = 0; i < 2; ++i)
49245 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49246 vperm = force_reg (V32QImode, vperm);
49247 l[i] = gen_reg_rtx (V32QImode);
49248 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49249 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49252 for (i = 0; i < 2; ++i)
49256 op = gen_reg_rtx (V32QImode);
49257 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49264 gcc_assert (l[0] && l[1]);
49266 if (d->vmode != V32QImode)
49267 op = gen_reg_rtx (V32QImode);
49268 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49269 if (op != d->target)
49270 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49274 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49275 With all of the interface bits taken care of, perform the expansion
49276 in D and return true on success. */
49279 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49281 /* Try a single instruction expansion. */
49282 if (expand_vec_perm_1 (d))
49285 /* Try sequences of two instructions. */
49287 if (expand_vec_perm_pshuflw_pshufhw (d))
49290 if (expand_vec_perm_palignr (d, false))
49293 if (expand_vec_perm_interleave2 (d))
49296 if (expand_vec_perm_broadcast (d))
49299 if (expand_vec_perm_vpermq_perm_1 (d))
49302 if (expand_vec_perm_vperm2f128 (d))
49305 if (expand_vec_perm_pblendv (d))
49308 /* Try sequences of three instructions. */
49310 if (expand_vec_perm_even_odd_pack (d))
49313 if (expand_vec_perm_2vperm2f128_vshuf (d))
49316 if (expand_vec_perm_pshufb2 (d))
49319 if (expand_vec_perm_interleave3 (d))
49322 if (expand_vec_perm_vperm2f128_vblend (d))
49325 /* Try sequences of four instructions. */
49327 if (expand_vec_perm_vpshufb2_vpermq (d))
49330 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49333 if (expand_vec_perm_vpermi2_vpshub2 (d))
49336 /* ??? Look for narrow permutations whose element orderings would
49337 allow the promotion to a wider mode. */
49339 /* ??? Look for sequences of interleave or a wider permute that place
49340 the data into the correct lanes for a half-vector shuffle like
49341 pshuf[lh]w or vpermilps. */
49343 /* ??? Look for sequences of interleave that produce the desired results.
49344 The combinatorics of punpck[lh] get pretty ugly... */
49346 if (expand_vec_perm_even_odd (d))
49349 /* Even longer sequences. */
49350 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49356 /* If a permutation only uses one operand, make it clear. Returns true
49357 if the permutation references both operands. */
49360 canonicalize_perm (struct expand_vec_perm_d *d)
49362 int i, which, nelt = d->nelt;
49364 for (i = which = 0; i < nelt; ++i)
49365 which |= (d->perm[i] < nelt ? 1 : 2);
49367 d->one_operand_p = true;
49374 if (!rtx_equal_p (d->op0, d->op1))
49376 d->one_operand_p = false;
49379 /* The elements of PERM do not suggest that only the first operand
49380 is used, but both operands are identical. Allow easier matching
49381 of the permutation by folding the permutation into the single
49386 for (i = 0; i < nelt; ++i)
49387 d->perm[i] &= nelt - 1;
49396 return (which == 3);
49400 ix86_expand_vec_perm_const (rtx operands[4])
49402 struct expand_vec_perm_d d;
49403 unsigned char perm[MAX_VECT_LEN];
49408 d.target = operands[0];
49409 d.op0 = operands[1];
49410 d.op1 = operands[2];
49413 d.vmode = GET_MODE (d.target);
49414 gcc_assert (VECTOR_MODE_P (d.vmode));
49415 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49416 d.testing_p = false;
49418 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49419 gcc_assert (XVECLEN (sel, 0) == nelt);
49420 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49422 for (i = 0; i < nelt; ++i)
49424 rtx e = XVECEXP (sel, 0, i);
49425 int ei = INTVAL (e) & (2 * nelt - 1);
49430 two_args = canonicalize_perm (&d);
49432 if (ix86_expand_vec_perm_const_1 (&d))
49435 /* If the selector says both arguments are needed, but the operands are the
49436 same, the above tried to expand with one_operand_p and flattened selector.
49437 If that didn't work, retry without one_operand_p; we succeeded with that
49439 if (two_args && d.one_operand_p)
49441 d.one_operand_p = false;
49442 memcpy (d.perm, perm, sizeof (perm));
49443 return ix86_expand_vec_perm_const_1 (&d);
49449 /* Implement targetm.vectorize.vec_perm_const_ok. */
49452 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49453 const unsigned char *sel)
49455 struct expand_vec_perm_d d;
49456 unsigned int i, nelt, which;
49460 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49461 d.testing_p = true;
49463 /* Given sufficient ISA support we can just return true here
49464 for selected vector modes. */
49471 if (TARGET_AVX512F)
49472 /* All implementable with a single vpermi2 insn. */
49476 if (TARGET_AVX512BW)
49477 /* All implementable with a single vpermi2 insn. */
49481 if (TARGET_AVX512BW)
49482 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
49489 if (TARGET_AVX512VL)
49490 /* All implementable with a single vpermi2 insn. */
49495 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49500 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49507 /* All implementable with a single vpperm insn. */
49510 /* All implementable with 2 pshufb + 1 ior. */
49516 /* All implementable with shufpd or unpck[lh]pd. */
49522 /* Extract the values from the vector CST into the permutation
49524 memcpy (d.perm, sel, nelt);
49525 for (i = which = 0; i < nelt; ++i)
49527 unsigned char e = d.perm[i];
49528 gcc_assert (e < 2 * nelt);
49529 which |= (e < nelt ? 1 : 2);
49532 /* For all elements from second vector, fold the elements to first. */
49534 for (i = 0; i < nelt; ++i)
49537 /* Check whether the mask can be applied to the vector type. */
49538 d.one_operand_p = (which != 3);
49540 /* Implementable with shufps or pshufd. */
49541 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49544 /* Otherwise we have to go through the motions and see if we can
49545 figure out how to generate the requested permutation. */
49546 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49547 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49548 if (!d.one_operand_p)
49549 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49552 ret = ix86_expand_vec_perm_const_1 (&d);
49559 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49561 struct expand_vec_perm_d d;
49567 d.vmode = GET_MODE (targ);
49568 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49569 d.one_operand_p = false;
49570 d.testing_p = false;
49572 for (i = 0; i < nelt; ++i)
49573 d.perm[i] = i * 2 + odd;
49575 /* We'll either be able to implement the permutation directly... */
49576 if (expand_vec_perm_1 (&d))
49579 /* ... or we use the special-case patterns. */
49580 expand_vec_perm_even_odd_1 (&d, odd);
49584 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49586 struct expand_vec_perm_d d;
49587 unsigned i, nelt, base;
49593 d.vmode = GET_MODE (targ);
49594 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49595 d.one_operand_p = false;
49596 d.testing_p = false;
49598 base = high_p ? nelt / 2 : 0;
49599 for (i = 0; i < nelt / 2; ++i)
49601 d.perm[i * 2] = i + base;
49602 d.perm[i * 2 + 1] = i + base + nelt;
49605 /* Note that for AVX this isn't one instruction. */
49606 ok = ix86_expand_vec_perm_const_1 (&d);
49611 /* Expand a vector operation CODE for a V*QImode in terms of the
49612 same operation on V*HImode. */
49615 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49617 machine_mode qimode = GET_MODE (dest);
49618 machine_mode himode;
49619 rtx (*gen_il) (rtx, rtx, rtx);
49620 rtx (*gen_ih) (rtx, rtx, rtx);
49621 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49622 struct expand_vec_perm_d d;
49623 bool ok, full_interleave;
49624 bool uns_p = false;
49631 gen_il = gen_vec_interleave_lowv16qi;
49632 gen_ih = gen_vec_interleave_highv16qi;
49635 himode = V16HImode;
49636 gen_il = gen_avx2_interleave_lowv32qi;
49637 gen_ih = gen_avx2_interleave_highv32qi;
49640 himode = V32HImode;
49641 gen_il = gen_avx512bw_interleave_lowv64qi;
49642 gen_ih = gen_avx512bw_interleave_highv64qi;
49645 gcc_unreachable ();
49648 op2_l = op2_h = op2;
49652 /* Unpack data such that we've got a source byte in each low byte of
49653 each word. We don't care what goes into the high byte of each word.
49654 Rather than trying to get zero in there, most convenient is to let
49655 it be a copy of the low byte. */
49656 op2_l = gen_reg_rtx (qimode);
49657 op2_h = gen_reg_rtx (qimode);
49658 emit_insn (gen_il (op2_l, op2, op2));
49659 emit_insn (gen_ih (op2_h, op2, op2));
49662 op1_l = gen_reg_rtx (qimode);
49663 op1_h = gen_reg_rtx (qimode);
49664 emit_insn (gen_il (op1_l, op1, op1));
49665 emit_insn (gen_ih (op1_h, op1, op1));
49666 full_interleave = qimode == V16QImode;
49674 op1_l = gen_reg_rtx (himode);
49675 op1_h = gen_reg_rtx (himode);
49676 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
49677 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
49678 full_interleave = true;
49681 gcc_unreachable ();
49684 /* Perform the operation. */
49685 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
49687 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
49689 gcc_assert (res_l && res_h);
49691 /* Merge the data back into the right place. */
49693 d.op0 = gen_lowpart (qimode, res_l);
49694 d.op1 = gen_lowpart (qimode, res_h);
49696 d.nelt = GET_MODE_NUNITS (qimode);
49697 d.one_operand_p = false;
49698 d.testing_p = false;
49700 if (full_interleave)
49702 /* For SSE2, we used an full interleave, so the desired
49703 results are in the even elements. */
49704 for (i = 0; i < 64; ++i)
49709 /* For AVX, the interleave used above was not cross-lane. So the
49710 extraction is evens but with the second and third quarter swapped.
49711 Happily, that is even one insn shorter than even extraction. */
49712 for (i = 0; i < 64; ++i)
49713 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
49716 ok = ix86_expand_vec_perm_const_1 (&d);
49719 set_unique_reg_note (get_last_insn (), REG_EQUAL,
49720 gen_rtx_fmt_ee (code, qimode, op1, op2));
49723 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
49724 if op is CONST_VECTOR with all odd elements equal to their
49725 preceding element. */
49728 const_vector_equal_evenodd_p (rtx op)
49730 machine_mode mode = GET_MODE (op);
49731 int i, nunits = GET_MODE_NUNITS (mode);
49732 if (GET_CODE (op) != CONST_VECTOR
49733 || nunits != CONST_VECTOR_NUNITS (op))
49735 for (i = 0; i < nunits; i += 2)
49736 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
49742 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
49743 bool uns_p, bool odd_p)
49745 machine_mode mode = GET_MODE (op1);
49746 machine_mode wmode = GET_MODE (dest);
49748 rtx orig_op1 = op1, orig_op2 = op2;
49750 if (!nonimmediate_operand (op1, mode))
49751 op1 = force_reg (mode, op1);
49752 if (!nonimmediate_operand (op2, mode))
49753 op2 = force_reg (mode, op2);
49755 /* We only play even/odd games with vectors of SImode. */
49756 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
49758 /* If we're looking for the odd results, shift those members down to
49759 the even slots. For some cpus this is faster than a PSHUFD. */
49762 /* For XOP use vpmacsdqh, but only for smult, as it is only
49764 if (TARGET_XOP && mode == V4SImode && !uns_p)
49766 x = force_reg (wmode, CONST0_RTX (wmode));
49767 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
49771 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
49772 if (!const_vector_equal_evenodd_p (orig_op1))
49773 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
49774 x, NULL, 1, OPTAB_DIRECT);
49775 if (!const_vector_equal_evenodd_p (orig_op2))
49776 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
49777 x, NULL, 1, OPTAB_DIRECT);
49778 op1 = gen_lowpart (mode, op1);
49779 op2 = gen_lowpart (mode, op2);
49782 if (mode == V16SImode)
49785 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
49787 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
49789 else if (mode == V8SImode)
49792 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
49794 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
49797 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
49798 else if (TARGET_SSE4_1)
49799 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
49802 rtx s1, s2, t0, t1, t2;
49804 /* The easiest way to implement this without PMULDQ is to go through
49805 the motions as if we are performing a full 64-bit multiply. With
49806 the exception that we need to do less shuffling of the elements. */
49808 /* Compute the sign-extension, aka highparts, of the two operands. */
49809 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49810 op1, pc_rtx, pc_rtx);
49811 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
49812 op2, pc_rtx, pc_rtx);
49814 /* Multiply LO(A) * HI(B), and vice-versa. */
49815 t1 = gen_reg_rtx (wmode);
49816 t2 = gen_reg_rtx (wmode);
49817 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
49818 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
49820 /* Multiply LO(A) * LO(B). */
49821 t0 = gen_reg_rtx (wmode);
49822 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
49824 /* Combine and shift the highparts into place. */
49825 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
49826 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
49829 /* Combine high and low parts. */
49830 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
49837 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
49838 bool uns_p, bool high_p)
49840 machine_mode wmode = GET_MODE (dest);
49841 machine_mode mode = GET_MODE (op1);
49842 rtx t1, t2, t3, t4, mask;
49847 t1 = gen_reg_rtx (mode);
49848 t2 = gen_reg_rtx (mode);
49849 if (TARGET_XOP && !uns_p)
49851 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
49852 shuffle the elements once so that all elements are in the right
49853 place for immediate use: { A C B D }. */
49854 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
49855 const1_rtx, GEN_INT (3)));
49856 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
49857 const1_rtx, GEN_INT (3)));
49861 /* Put the elements into place for the multiply. */
49862 ix86_expand_vec_interleave (t1, op1, op1, high_p);
49863 ix86_expand_vec_interleave (t2, op2, op2, high_p);
49866 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
49870 /* Shuffle the elements between the lanes. After this we
49871 have { A B E F | C D G H } for each operand. */
49872 t1 = gen_reg_rtx (V4DImode);
49873 t2 = gen_reg_rtx (V4DImode);
49874 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
49875 const0_rtx, const2_rtx,
49876 const1_rtx, GEN_INT (3)));
49877 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
49878 const0_rtx, const2_rtx,
49879 const1_rtx, GEN_INT (3)));
49881 /* Shuffle the elements within the lanes. After this we
49882 have { A A B B | C C D D } or { E E F F | G G H H }. */
49883 t3 = gen_reg_rtx (V8SImode);
49884 t4 = gen_reg_rtx (V8SImode);
49885 mask = GEN_INT (high_p
49886 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
49887 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
49888 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
49889 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
49891 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
49896 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
49897 uns_p, OPTAB_DIRECT);
49898 t2 = expand_binop (mode,
49899 uns_p ? umul_highpart_optab : smul_highpart_optab,
49900 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
49901 gcc_assert (t1 && t2);
49903 t3 = gen_reg_rtx (mode);
49904 ix86_expand_vec_interleave (t3, t1, t2, high_p);
49905 emit_move_insn (dest, gen_lowpart (wmode, t3));
49913 t1 = gen_reg_rtx (wmode);
49914 t2 = gen_reg_rtx (wmode);
49915 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
49916 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
49918 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
49922 gcc_unreachable ();
49927 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
49929 rtx res_1, res_2, res_3, res_4;
49931 res_1 = gen_reg_rtx (V4SImode);
49932 res_2 = gen_reg_rtx (V4SImode);
49933 res_3 = gen_reg_rtx (V2DImode);
49934 res_4 = gen_reg_rtx (V2DImode);
49935 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
49936 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
49938 /* Move the results in element 2 down to element 1; we don't care
49939 what goes in elements 2 and 3. Then we can merge the parts
49940 back together with an interleave.
49942 Note that two other sequences were tried:
49943 (1) Use interleaves at the start instead of psrldq, which allows
49944 us to use a single shufps to merge things back at the end.
49945 (2) Use shufps here to combine the two vectors, then pshufd to
49946 put the elements in the correct order.
49947 In both cases the cost of the reformatting stall was too high
49948 and the overall sequence slower. */
49950 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
49951 const0_rtx, const2_rtx,
49952 const0_rtx, const0_rtx));
49953 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
49954 const0_rtx, const2_rtx,
49955 const0_rtx, const0_rtx));
49956 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
49958 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
49962 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
49964 machine_mode mode = GET_MODE (op0);
49965 rtx t1, t2, t3, t4, t5, t6;
49967 if (TARGET_AVX512DQ && mode == V8DImode)
49968 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
49969 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
49970 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
49971 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
49972 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
49973 else if (TARGET_XOP && mode == V2DImode)
49975 /* op1: A,B,C,D, op2: E,F,G,H */
49976 op1 = gen_lowpart (V4SImode, op1);
49977 op2 = gen_lowpart (V4SImode, op2);
49979 t1 = gen_reg_rtx (V4SImode);
49980 t2 = gen_reg_rtx (V4SImode);
49981 t3 = gen_reg_rtx (V2DImode);
49982 t4 = gen_reg_rtx (V2DImode);
49985 emit_insn (gen_sse2_pshufd_1 (t1, op1,
49991 /* t2: (B*E),(A*F),(D*G),(C*H) */
49992 emit_insn (gen_mulv4si3 (t2, t1, op2));
49994 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
49995 emit_insn (gen_xop_phadddq (t3, t2));
49997 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
49998 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
50000 /* Multiply lower parts and add all */
50001 t5 = gen_reg_rtx (V2DImode);
50002 emit_insn (gen_vec_widen_umult_even_v4si (t5,
50003 gen_lowpart (V4SImode, op1),
50004 gen_lowpart (V4SImode, op2)));
50005 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
50010 machine_mode nmode;
50011 rtx (*umul) (rtx, rtx, rtx);
50013 if (mode == V2DImode)
50015 umul = gen_vec_widen_umult_even_v4si;
50018 else if (mode == V4DImode)
50020 umul = gen_vec_widen_umult_even_v8si;
50023 else if (mode == V8DImode)
50025 umul = gen_vec_widen_umult_even_v16si;
50029 gcc_unreachable ();
50032 /* Multiply low parts. */
50033 t1 = gen_reg_rtx (mode);
50034 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
50036 /* Shift input vectors right 32 bits so we can multiply high parts. */
50038 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
50039 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
50041 /* Multiply high parts by low parts. */
50042 t4 = gen_reg_rtx (mode);
50043 t5 = gen_reg_rtx (mode);
50044 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
50045 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
50047 /* Combine and shift the highparts back. */
50048 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
50049 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
50051 /* Combine high and low parts. */
50052 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
50055 set_unique_reg_note (get_last_insn (), REG_EQUAL,
50056 gen_rtx_MULT (mode, op1, op2));
50059 /* Return 1 if control tansfer instruction INSN
50060 should be encoded with bnd prefix.
50061 If insn is NULL then return 1 when control
50062 transfer instructions should be prefixed with
50063 bnd by default for current function. */
50066 ix86_bnd_prefixed_insn_p (rtx insn)
50068 /* For call insns check special flag. */
50069 if (insn && CALL_P (insn))
50071 rtx call = get_call_rtx_from (insn);
50073 return CALL_EXPR_WITH_BOUNDS_P (call);
50076 /* All other insns are prefixed only if function is instrumented. */
50077 return chkp_function_instrumented_p (current_function_decl);
50080 /* Calculate integer abs() using only SSE2 instructions. */
50083 ix86_expand_sse2_abs (rtx target, rtx input)
50085 machine_mode mode = GET_MODE (target);
50090 /* For 32-bit signed integer X, the best way to calculate the absolute
50091 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
50093 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
50094 GEN_INT (GET_MODE_BITSIZE
50095 (GET_MODE_INNER (mode)) - 1),
50096 NULL, 0, OPTAB_DIRECT);
50097 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
50098 NULL, 0, OPTAB_DIRECT);
50099 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
50100 target, 0, OPTAB_DIRECT);
50103 /* For 16-bit signed integer X, the best way to calculate the absolute
50104 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
50106 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50108 x = expand_simple_binop (mode, SMAX, tmp0, input,
50109 target, 0, OPTAB_DIRECT);
50112 /* For 8-bit signed integer X, the best way to calculate the absolute
50113 value of X is min ((unsigned char) X, (unsigned char) (-X)),
50114 as SSE2 provides the PMINUB insn. */
50116 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50118 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
50119 target, 0, OPTAB_DIRECT);
50123 gcc_unreachable ();
50127 emit_move_insn (target, x);
50130 /* Expand an insert into a vector register through pinsr insn.
50131 Return true if successful. */
50134 ix86_expand_pinsr (rtx *operands)
50136 rtx dst = operands[0];
50137 rtx src = operands[3];
50139 unsigned int size = INTVAL (operands[1]);
50140 unsigned int pos = INTVAL (operands[2]);
50142 if (GET_CODE (dst) == SUBREG)
50144 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
50145 dst = SUBREG_REG (dst);
50148 if (GET_CODE (src) == SUBREG)
50149 src = SUBREG_REG (src);
50151 switch (GET_MODE (dst))
50158 machine_mode srcmode, dstmode;
50159 rtx (*pinsr)(rtx, rtx, rtx, rtx);
50161 srcmode = mode_for_size (size, MODE_INT, 0);
50166 if (!TARGET_SSE4_1)
50168 dstmode = V16QImode;
50169 pinsr = gen_sse4_1_pinsrb;
50175 dstmode = V8HImode;
50176 pinsr = gen_sse2_pinsrw;
50180 if (!TARGET_SSE4_1)
50182 dstmode = V4SImode;
50183 pinsr = gen_sse4_1_pinsrd;
50187 gcc_assert (TARGET_64BIT);
50188 if (!TARGET_SSE4_1)
50190 dstmode = V2DImode;
50191 pinsr = gen_sse4_1_pinsrq;
50199 if (GET_MODE (dst) != dstmode)
50200 d = gen_reg_rtx (dstmode);
50201 src = gen_lowpart (srcmode, src);
50205 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
50206 GEN_INT (1 << pos)));
50208 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50217 /* This function returns the calling abi specific va_list type node.
50218 It returns the FNDECL specific va_list type. */
50221 ix86_fn_abi_va_list (tree fndecl)
50224 return va_list_type_node;
50225 gcc_assert (fndecl != NULL_TREE);
50227 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50228 return ms_va_list_type_node;
50230 return sysv_va_list_type_node;
50233 /* Returns the canonical va_list type specified by TYPE. If there
50234 is no valid TYPE provided, it return NULL_TREE. */
50237 ix86_canonical_va_list_type (tree type)
50241 /* Resolve references and pointers to va_list type. */
50242 if (TREE_CODE (type) == MEM_REF)
50243 type = TREE_TYPE (type);
50244 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
50245 type = TREE_TYPE (type);
50246 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
50247 type = TREE_TYPE (type);
50249 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50251 wtype = va_list_type_node;
50252 gcc_assert (wtype != NULL_TREE);
50254 if (TREE_CODE (wtype) == ARRAY_TYPE)
50256 /* If va_list is an array type, the argument may have decayed
50257 to a pointer type, e.g. by being passed to another function.
50258 In that case, unwrap both types so that we can compare the
50259 underlying records. */
50260 if (TREE_CODE (htype) == ARRAY_TYPE
50261 || POINTER_TYPE_P (htype))
50263 wtype = TREE_TYPE (wtype);
50264 htype = TREE_TYPE (htype);
50267 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50268 return va_list_type_node;
50269 wtype = sysv_va_list_type_node;
50270 gcc_assert (wtype != NULL_TREE);
50272 if (TREE_CODE (wtype) == ARRAY_TYPE)
50274 /* If va_list is an array type, the argument may have decayed
50275 to a pointer type, e.g. by being passed to another function.
50276 In that case, unwrap both types so that we can compare the
50277 underlying records. */
50278 if (TREE_CODE (htype) == ARRAY_TYPE
50279 || POINTER_TYPE_P (htype))
50281 wtype = TREE_TYPE (wtype);
50282 htype = TREE_TYPE (htype);
50285 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50286 return sysv_va_list_type_node;
50287 wtype = ms_va_list_type_node;
50288 gcc_assert (wtype != NULL_TREE);
50290 if (TREE_CODE (wtype) == ARRAY_TYPE)
50292 /* If va_list is an array type, the argument may have decayed
50293 to a pointer type, e.g. by being passed to another function.
50294 In that case, unwrap both types so that we can compare the
50295 underlying records. */
50296 if (TREE_CODE (htype) == ARRAY_TYPE
50297 || POINTER_TYPE_P (htype))
50299 wtype = TREE_TYPE (wtype);
50300 htype = TREE_TYPE (htype);
50303 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50304 return ms_va_list_type_node;
50307 return std_canonical_va_list_type (type);
50310 /* Iterate through the target-specific builtin types for va_list.
50311 IDX denotes the iterator, *PTREE is set to the result type of
50312 the va_list builtin, and *PNAME to its internal type.
50313 Returns zero if there is no element for this index, otherwise
50314 IDX should be increased upon the next call.
50315 Note, do not iterate a base builtin's name like __builtin_va_list.
50316 Used from c_common_nodes_and_builtins. */
50319 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50329 *ptree = ms_va_list_type_node;
50330 *pname = "__builtin_ms_va_list";
50334 *ptree = sysv_va_list_type_node;
50335 *pname = "__builtin_sysv_va_list";
50343 #undef TARGET_SCHED_DISPATCH
50344 #define TARGET_SCHED_DISPATCH has_dispatch
50345 #undef TARGET_SCHED_DISPATCH_DO
50346 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50347 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50348 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50349 #undef TARGET_SCHED_REORDER
50350 #define TARGET_SCHED_REORDER ix86_sched_reorder
50351 #undef TARGET_SCHED_ADJUST_PRIORITY
50352 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50353 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50354 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50355 ix86_dependencies_evaluation_hook
50357 /* The size of the dispatch window is the total number of bytes of
50358 object code allowed in a window. */
50359 #define DISPATCH_WINDOW_SIZE 16
50361 /* Number of dispatch windows considered for scheduling. */
50362 #define MAX_DISPATCH_WINDOWS 3
50364 /* Maximum number of instructions in a window. */
50367 /* Maximum number of immediate operands in a window. */
50370 /* Maximum number of immediate bits allowed in a window. */
50371 #define MAX_IMM_SIZE 128
50373 /* Maximum number of 32 bit immediates allowed in a window. */
50374 #define MAX_IMM_32 4
50376 /* Maximum number of 64 bit immediates allowed in a window. */
50377 #define MAX_IMM_64 2
50379 /* Maximum total of loads or prefetches allowed in a window. */
50382 /* Maximum total of stores allowed in a window. */
50383 #define MAX_STORE 1
50389 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50390 enum dispatch_group {
50405 /* Number of allowable groups in a dispatch window. It is an array
50406 indexed by dispatch_group enum. 100 is used as a big number,
50407 because the number of these kind of operations does not have any
50408 effect in dispatch window, but we need them for other reasons in
50410 static unsigned int num_allowable_groups[disp_last] = {
50411 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50414 char group_name[disp_last + 1][16] = {
50415 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50416 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50417 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50420 /* Instruction path. */
50423 path_single, /* Single micro op. */
50424 path_double, /* Double micro op. */
50425 path_multi, /* Instructions with more than 2 micro op.. */
50429 /* sched_insn_info defines a window to the instructions scheduled in
50430 the basic block. It contains a pointer to the insn_info table and
50431 the instruction scheduled.
50433 Windows are allocated for each basic block and are linked
50435 typedef struct sched_insn_info_s {
50437 enum dispatch_group group;
50438 enum insn_path path;
50443 /* Linked list of dispatch windows. This is a two way list of
50444 dispatch windows of a basic block. It contains information about
50445 the number of uops in the window and the total number of
50446 instructions and of bytes in the object code for this dispatch
50448 typedef struct dispatch_windows_s {
50449 int num_insn; /* Number of insn in the window. */
50450 int num_uops; /* Number of uops in the window. */
50451 int window_size; /* Number of bytes in the window. */
50452 int window_num; /* Window number between 0 or 1. */
50453 int num_imm; /* Number of immediates in an insn. */
50454 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50455 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50456 int imm_size; /* Total immediates in the window. */
50457 int num_loads; /* Total memory loads in the window. */
50458 int num_stores; /* Total memory stores in the window. */
50459 int violation; /* Violation exists in window. */
50460 sched_insn_info *window; /* Pointer to the window. */
50461 struct dispatch_windows_s *next;
50462 struct dispatch_windows_s *prev;
50463 } dispatch_windows;
50465 /* Immediate valuse used in an insn. */
50466 typedef struct imm_info_s
50473 static dispatch_windows *dispatch_window_list;
50474 static dispatch_windows *dispatch_window_list1;
50476 /* Get dispatch group of insn. */
50478 static enum dispatch_group
50479 get_mem_group (rtx_insn *insn)
50481 enum attr_memory memory;
50483 if (INSN_CODE (insn) < 0)
50484 return disp_no_group;
50485 memory = get_attr_memory (insn);
50486 if (memory == MEMORY_STORE)
50489 if (memory == MEMORY_LOAD)
50492 if (memory == MEMORY_BOTH)
50493 return disp_load_store;
50495 return disp_no_group;
50498 /* Return true if insn is a compare instruction. */
50501 is_cmp (rtx_insn *insn)
50503 enum attr_type type;
50505 type = get_attr_type (insn);
50506 return (type == TYPE_TEST
50507 || type == TYPE_ICMP
50508 || type == TYPE_FCMP
50509 || GET_CODE (PATTERN (insn)) == COMPARE);
50512 /* Return true if a dispatch violation encountered. */
50515 dispatch_violation (void)
50517 if (dispatch_window_list->next)
50518 return dispatch_window_list->next->violation;
50519 return dispatch_window_list->violation;
50522 /* Return true if insn is a branch instruction. */
50525 is_branch (rtx insn)
50527 return (CALL_P (insn) || JUMP_P (insn));
50530 /* Return true if insn is a prefetch instruction. */
50533 is_prefetch (rtx insn)
50535 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50538 /* This function initializes a dispatch window and the list container holding a
50539 pointer to the window. */
50542 init_window (int window_num)
50545 dispatch_windows *new_list;
50547 if (window_num == 0)
50548 new_list = dispatch_window_list;
50550 new_list = dispatch_window_list1;
50552 new_list->num_insn = 0;
50553 new_list->num_uops = 0;
50554 new_list->window_size = 0;
50555 new_list->next = NULL;
50556 new_list->prev = NULL;
50557 new_list->window_num = window_num;
50558 new_list->num_imm = 0;
50559 new_list->num_imm_32 = 0;
50560 new_list->num_imm_64 = 0;
50561 new_list->imm_size = 0;
50562 new_list->num_loads = 0;
50563 new_list->num_stores = 0;
50564 new_list->violation = false;
50566 for (i = 0; i < MAX_INSN; i++)
50568 new_list->window[i].insn = NULL;
50569 new_list->window[i].group = disp_no_group;
50570 new_list->window[i].path = no_path;
50571 new_list->window[i].byte_len = 0;
50572 new_list->window[i].imm_bytes = 0;
50577 /* This function allocates and initializes a dispatch window and the
50578 list container holding a pointer to the window. */
50580 static dispatch_windows *
50581 allocate_window (void)
50583 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50584 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50589 /* This routine initializes the dispatch scheduling information. It
50590 initiates building dispatch scheduler tables and constructs the
50591 first dispatch window. */
50594 init_dispatch_sched (void)
50596 /* Allocate a dispatch list and a window. */
50597 dispatch_window_list = allocate_window ();
50598 dispatch_window_list1 = allocate_window ();
50603 /* This function returns true if a branch is detected. End of a basic block
50604 does not have to be a branch, but here we assume only branches end a
50608 is_end_basic_block (enum dispatch_group group)
50610 return group == disp_branch;
50613 /* This function is called when the end of a window processing is reached. */
50616 process_end_window (void)
50618 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
50619 if (dispatch_window_list->next)
50621 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
50622 gcc_assert (dispatch_window_list->window_size
50623 + dispatch_window_list1->window_size <= 48);
50629 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
50630 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
50631 for 48 bytes of instructions. Note that these windows are not dispatch
50632 windows that their sizes are DISPATCH_WINDOW_SIZE. */
50634 static dispatch_windows *
50635 allocate_next_window (int window_num)
50637 if (window_num == 0)
50639 if (dispatch_window_list->next)
50642 return dispatch_window_list;
50645 dispatch_window_list->next = dispatch_window_list1;
50646 dispatch_window_list1->prev = dispatch_window_list;
50648 return dispatch_window_list1;
50651 /* Compute number of immediate operands of an instruction. */
50654 find_constant (rtx in_rtx, imm_info *imm_values)
50656 if (INSN_P (in_rtx))
50657 in_rtx = PATTERN (in_rtx);
50658 subrtx_iterator::array_type array;
50659 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
50660 if (const_rtx x = *iter)
50661 switch (GET_CODE (x))
50666 (imm_values->imm)++;
50667 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
50668 (imm_values->imm32)++;
50670 (imm_values->imm64)++;
50674 (imm_values->imm)++;
50675 (imm_values->imm64)++;
50679 if (LABEL_KIND (x) == LABEL_NORMAL)
50681 (imm_values->imm)++;
50682 (imm_values->imm32)++;
50691 /* Return total size of immediate operands of an instruction along with number
50692 of corresponding immediate-operands. It initializes its parameters to zero
50693 befor calling FIND_CONSTANT.
50694 INSN is the input instruction. IMM is the total of immediates.
50695 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
50699 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
50701 imm_info imm_values = {0, 0, 0};
50703 find_constant (insn, &imm_values);
50704 *imm = imm_values.imm;
50705 *imm32 = imm_values.imm32;
50706 *imm64 = imm_values.imm64;
50707 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
50710 /* This function indicates if an operand of an instruction is an
50714 has_immediate (rtx insn)
50716 int num_imm_operand;
50717 int num_imm32_operand;
50718 int num_imm64_operand;
50721 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50722 &num_imm64_operand);
50726 /* Return single or double path for instructions. */
50728 static enum insn_path
50729 get_insn_path (rtx_insn *insn)
50731 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
50733 if ((int)path == 0)
50734 return path_single;
50736 if ((int)path == 1)
50737 return path_double;
50742 /* Return insn dispatch group. */
50744 static enum dispatch_group
50745 get_insn_group (rtx_insn *insn)
50747 enum dispatch_group group = get_mem_group (insn);
50751 if (is_branch (insn))
50752 return disp_branch;
50757 if (has_immediate (insn))
50760 if (is_prefetch (insn))
50761 return disp_prefetch;
50763 return disp_no_group;
50766 /* Count number of GROUP restricted instructions in a dispatch
50767 window WINDOW_LIST. */
50770 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
50772 enum dispatch_group group = get_insn_group (insn);
50774 int num_imm_operand;
50775 int num_imm32_operand;
50776 int num_imm64_operand;
50778 if (group == disp_no_group)
50781 if (group == disp_imm)
50783 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50784 &num_imm64_operand);
50785 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
50786 || num_imm_operand + window_list->num_imm > MAX_IMM
50787 || (num_imm32_operand > 0
50788 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
50789 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
50790 || (num_imm64_operand > 0
50791 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
50792 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
50793 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
50794 && num_imm64_operand > 0
50795 && ((window_list->num_imm_64 > 0
50796 && window_list->num_insn >= 2)
50797 || window_list->num_insn >= 3)))
50803 if ((group == disp_load_store
50804 && (window_list->num_loads >= MAX_LOAD
50805 || window_list->num_stores >= MAX_STORE))
50806 || ((group == disp_load
50807 || group == disp_prefetch)
50808 && window_list->num_loads >= MAX_LOAD)
50809 || (group == disp_store
50810 && window_list->num_stores >= MAX_STORE))
50816 /* This function returns true if insn satisfies dispatch rules on the
50817 last window scheduled. */
50820 fits_dispatch_window (rtx_insn *insn)
50822 dispatch_windows *window_list = dispatch_window_list;
50823 dispatch_windows *window_list_next = dispatch_window_list->next;
50824 unsigned int num_restrict;
50825 enum dispatch_group group = get_insn_group (insn);
50826 enum insn_path path = get_insn_path (insn);
50829 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
50830 instructions should be given the lowest priority in the
50831 scheduling process in Haifa scheduler to make sure they will be
50832 scheduled in the same dispatch window as the reference to them. */
50833 if (group == disp_jcc || group == disp_cmp)
50836 /* Check nonrestricted. */
50837 if (group == disp_no_group || group == disp_branch)
50840 /* Get last dispatch window. */
50841 if (window_list_next)
50842 window_list = window_list_next;
50844 if (window_list->window_num == 1)
50846 sum = window_list->prev->window_size + window_list->window_size;
50849 || (min_insn_size (insn) + sum) >= 48)
50850 /* Window 1 is full. Go for next window. */
50854 num_restrict = count_num_restricted (insn, window_list);
50856 if (num_restrict > num_allowable_groups[group])
50859 /* See if it fits in the first window. */
50860 if (window_list->window_num == 0)
50862 /* The first widow should have only single and double path
50864 if (path == path_double
50865 && (window_list->num_uops + 2) > MAX_INSN)
50867 else if (path != path_single)
50873 /* Add an instruction INSN with NUM_UOPS micro-operations to the
50874 dispatch window WINDOW_LIST. */
50877 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
50879 int byte_len = min_insn_size (insn);
50880 int num_insn = window_list->num_insn;
50882 sched_insn_info *window = window_list->window;
50883 enum dispatch_group group = get_insn_group (insn);
50884 enum insn_path path = get_insn_path (insn);
50885 int num_imm_operand;
50886 int num_imm32_operand;
50887 int num_imm64_operand;
50889 if (!window_list->violation && group != disp_cmp
50890 && !fits_dispatch_window (insn))
50891 window_list->violation = true;
50893 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
50894 &num_imm64_operand);
50896 /* Initialize window with new instruction. */
50897 window[num_insn].insn = insn;
50898 window[num_insn].byte_len = byte_len;
50899 window[num_insn].group = group;
50900 window[num_insn].path = path;
50901 window[num_insn].imm_bytes = imm_size;
50903 window_list->window_size += byte_len;
50904 window_list->num_insn = num_insn + 1;
50905 window_list->num_uops = window_list->num_uops + num_uops;
50906 window_list->imm_size += imm_size;
50907 window_list->num_imm += num_imm_operand;
50908 window_list->num_imm_32 += num_imm32_operand;
50909 window_list->num_imm_64 += num_imm64_operand;
50911 if (group == disp_store)
50912 window_list->num_stores += 1;
50913 else if (group == disp_load
50914 || group == disp_prefetch)
50915 window_list->num_loads += 1;
50916 else if (group == disp_load_store)
50918 window_list->num_stores += 1;
50919 window_list->num_loads += 1;
50923 /* Adds a scheduled instruction, INSN, to the current dispatch window.
50924 If the total bytes of instructions or the number of instructions in
50925 the window exceed allowable, it allocates a new window. */
50928 add_to_dispatch_window (rtx_insn *insn)
50931 dispatch_windows *window_list;
50932 dispatch_windows *next_list;
50933 dispatch_windows *window0_list;
50934 enum insn_path path;
50935 enum dispatch_group insn_group;
50943 if (INSN_CODE (insn) < 0)
50946 byte_len = min_insn_size (insn);
50947 window_list = dispatch_window_list;
50948 next_list = window_list->next;
50949 path = get_insn_path (insn);
50950 insn_group = get_insn_group (insn);
50952 /* Get the last dispatch window. */
50954 window_list = dispatch_window_list->next;
50956 if (path == path_single)
50958 else if (path == path_double)
50961 insn_num_uops = (int) path;
50963 /* If current window is full, get a new window.
50964 Window number zero is full, if MAX_INSN uops are scheduled in it.
50965 Window number one is full, if window zero's bytes plus window
50966 one's bytes is 32, or if the bytes of the new instruction added
50967 to the total makes it greater than 48, or it has already MAX_INSN
50968 instructions in it. */
50969 num_insn = window_list->num_insn;
50970 num_uops = window_list->num_uops;
50971 window_num = window_list->window_num;
50972 insn_fits = fits_dispatch_window (insn);
50974 if (num_insn >= MAX_INSN
50975 || num_uops + insn_num_uops > MAX_INSN
50978 window_num = ~window_num & 1;
50979 window_list = allocate_next_window (window_num);
50982 if (window_num == 0)
50984 add_insn_window (insn, window_list, insn_num_uops);
50985 if (window_list->num_insn >= MAX_INSN
50986 && insn_group == disp_branch)
50988 process_end_window ();
50992 else if (window_num == 1)
50994 window0_list = window_list->prev;
50995 sum = window0_list->window_size + window_list->window_size;
50997 || (byte_len + sum) >= 48)
50999 process_end_window ();
51000 window_list = dispatch_window_list;
51003 add_insn_window (insn, window_list, insn_num_uops);
51006 gcc_unreachable ();
51008 if (is_end_basic_block (insn_group))
51010 /* End of basic block is reached do end-basic-block process. */
51011 process_end_window ();
51016 /* Print the dispatch window, WINDOW_NUM, to FILE. */
51018 DEBUG_FUNCTION static void
51019 debug_dispatch_window_file (FILE *file, int window_num)
51021 dispatch_windows *list;
51024 if (window_num == 0)
51025 list = dispatch_window_list;
51027 list = dispatch_window_list1;
51029 fprintf (file, "Window #%d:\n", list->window_num);
51030 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
51031 list->num_insn, list->num_uops, list->window_size);
51032 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51033 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
51035 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
51037 fprintf (file, " insn info:\n");
51039 for (i = 0; i < MAX_INSN; i++)
51041 if (!list->window[i].insn)
51043 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
51044 i, group_name[list->window[i].group],
51045 i, (void *)list->window[i].insn,
51046 i, list->window[i].path,
51047 i, list->window[i].byte_len,
51048 i, list->window[i].imm_bytes);
51052 /* Print to stdout a dispatch window. */
51054 DEBUG_FUNCTION void
51055 debug_dispatch_window (int window_num)
51057 debug_dispatch_window_file (stdout, window_num);
51060 /* Print INSN dispatch information to FILE. */
51062 DEBUG_FUNCTION static void
51063 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
51066 enum insn_path path;
51067 enum dispatch_group group;
51069 int num_imm_operand;
51070 int num_imm32_operand;
51071 int num_imm64_operand;
51073 if (INSN_CODE (insn) < 0)
51076 byte_len = min_insn_size (insn);
51077 path = get_insn_path (insn);
51078 group = get_insn_group (insn);
51079 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51080 &num_imm64_operand);
51082 fprintf (file, " insn info:\n");
51083 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
51084 group_name[group], path, byte_len);
51085 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51086 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
51089 /* Print to STDERR the status of the ready list with respect to
51090 dispatch windows. */
51092 DEBUG_FUNCTION void
51093 debug_ready_dispatch (void)
51096 int no_ready = number_in_ready ();
51098 fprintf (stdout, "Number of ready: %d\n", no_ready);
51100 for (i = 0; i < no_ready; i++)
51101 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
51104 /* This routine is the driver of the dispatch scheduler. */
51107 do_dispatch (rtx_insn *insn, int mode)
51109 if (mode == DISPATCH_INIT)
51110 init_dispatch_sched ();
51111 else if (mode == ADD_TO_DISPATCH_WINDOW)
51112 add_to_dispatch_window (insn);
51115 /* Return TRUE if Dispatch Scheduling is supported. */
51118 has_dispatch (rtx_insn *insn, int action)
51120 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
51121 && flag_dispatch_scheduler)
51127 case IS_DISPATCH_ON:
51132 return is_cmp (insn);
51134 case DISPATCH_VIOLATION:
51135 return dispatch_violation ();
51137 case FITS_DISPATCH_WINDOW:
51138 return fits_dispatch_window (insn);
51144 /* Implementation of reassociation_width target hook used by
51145 reassoc phase to identify parallelism level in reassociated
51146 tree. Statements tree_code is passed in OPC. Arguments type
51149 Currently parallel reassociation is enabled for Atom
51150 processors only and we set reassociation width to be 2
51151 because Atom may issue up to 2 instructions per cycle.
51153 Return value should be fixed if parallel reassociation is
51154 enabled for other processors. */
51157 ix86_reassociation_width (unsigned int, machine_mode mode)
51160 if (VECTOR_MODE_P (mode))
51162 if (TARGET_VECTOR_PARALLEL_EXECUTION)
51169 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
51171 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
51177 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
51178 place emms and femms instructions. */
51180 static machine_mode
51181 ix86_preferred_simd_mode (machine_mode mode)
51189 return TARGET_AVX512BW ? V64QImode :
51190 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
51192 return TARGET_AVX512BW ? V32HImode :
51193 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
51195 return TARGET_AVX512F ? V16SImode :
51196 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
51198 return TARGET_AVX512F ? V8DImode :
51199 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
51202 if (TARGET_AVX512F)
51204 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51210 if (!TARGET_VECTORIZE_DOUBLE)
51212 else if (TARGET_AVX512F)
51214 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51216 else if (TARGET_SSE2)
51225 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
51226 vectors. If AVX512F is enabled then try vectorizing with 512bit,
51227 256bit and 128bit vectors. */
51229 static unsigned int
51230 ix86_autovectorize_vector_sizes (void)
51232 return TARGET_AVX512F ? 64 | 32 | 16 :
51233 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
51238 /* Return class of registers which could be used for pseudo of MODE
51239 and of class RCLASS for spilling instead of memory. Return NO_REGS
51240 if it is not possible or non-profitable. */
51242 ix86_spill_class (reg_class_t rclass, machine_mode mode)
51244 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
51245 && (mode == SImode || (TARGET_64BIT && mode == DImode))
51246 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51247 return ALL_SSE_REGS;
51251 /* Implement targetm.vectorize.init_cost. */
51254 ix86_init_cost (struct loop *)
51256 unsigned *cost = XNEWVEC (unsigned, 3);
51257 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51261 /* Implement targetm.vectorize.add_stmt_cost. */
51264 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51265 struct _stmt_vec_info *stmt_info, int misalign,
51266 enum vect_cost_model_location where)
51268 unsigned *cost = (unsigned *) data;
51269 unsigned retval = 0;
51271 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51272 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51274 /* Statements in an inner loop relative to the loop being
51275 vectorized are weighted more heavily. The value here is
51276 arbitrary and could potentially be improved with analysis. */
51277 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51278 count *= 50; /* FIXME. */
51280 retval = (unsigned) (count * stmt_cost);
51282 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51283 for Silvermont as it has out of order integer pipeline and can execute
51284 2 scalar instruction per tick, but has in order SIMD pipeline. */
51285 if (TARGET_SILVERMONT || TARGET_INTEL)
51286 if (stmt_info && stmt_info->stmt)
51288 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51289 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51290 retval = (retval * 17) / 10;
51293 cost[where] += retval;
51298 /* Implement targetm.vectorize.finish_cost. */
51301 ix86_finish_cost (void *data, unsigned *prologue_cost,
51302 unsigned *body_cost, unsigned *epilogue_cost)
51304 unsigned *cost = (unsigned *) data;
51305 *prologue_cost = cost[vect_prologue];
51306 *body_cost = cost[vect_body];
51307 *epilogue_cost = cost[vect_epilogue];
51310 /* Implement targetm.vectorize.destroy_cost_data. */
51313 ix86_destroy_cost_data (void *data)
51318 /* Validate target specific memory model bits in VAL. */
51320 static unsigned HOST_WIDE_INT
51321 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51323 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
51326 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51328 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51330 warning (OPT_Winvalid_memory_model,
51331 "Unknown architecture specific memory model");
51332 return MEMMODEL_SEQ_CST;
51334 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
51335 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
51337 warning (OPT_Winvalid_memory_model,
51338 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51339 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51341 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
51343 warning (OPT_Winvalid_memory_model,
51344 "HLE_RELEASE not used with RELEASE or stronger memory model");
51345 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51350 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51351 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51352 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51353 or number of vecsize_mangle variants that should be emitted. */
51356 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51357 struct cgraph_simd_clone *clonei,
51358 tree base_type, int num)
51362 if (clonei->simdlen
51363 && (clonei->simdlen < 2
51364 || clonei->simdlen > 16
51365 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51367 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51368 "unsupported simdlen %d", clonei->simdlen);
51372 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51373 if (TREE_CODE (ret_type) != VOID_TYPE)
51374 switch (TYPE_MODE (ret_type))
51386 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51387 "unsupported return type %qT for simd\n", ret_type);
51394 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51395 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51396 switch (TYPE_MODE (TREE_TYPE (t)))
51408 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51409 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51413 if (clonei->cilk_elemental)
51415 /* Parse here processor clause. If not present, default to 'b'. */
51416 clonei->vecsize_mangle = 'b';
51418 else if (!TREE_PUBLIC (node->decl))
51420 /* If the function isn't exported, we can pick up just one ISA
51423 clonei->vecsize_mangle = 'd';
51424 else if (TARGET_AVX)
51425 clonei->vecsize_mangle = 'c';
51427 clonei->vecsize_mangle = 'b';
51432 clonei->vecsize_mangle = "bcd"[num];
51435 switch (clonei->vecsize_mangle)
51438 clonei->vecsize_int = 128;
51439 clonei->vecsize_float = 128;
51442 clonei->vecsize_int = 128;
51443 clonei->vecsize_float = 256;
51446 clonei->vecsize_int = 256;
51447 clonei->vecsize_float = 256;
51450 if (clonei->simdlen == 0)
51452 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51453 clonei->simdlen = clonei->vecsize_int;
51455 clonei->simdlen = clonei->vecsize_float;
51456 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51457 if (clonei->simdlen > 16)
51458 clonei->simdlen = 16;
51463 /* Add target attribute to SIMD clone NODE if needed. */
51466 ix86_simd_clone_adjust (struct cgraph_node *node)
51468 const char *str = NULL;
51469 gcc_assert (node->decl == cfun->decl);
51470 switch (node->simdclone->vecsize_mangle)
51485 gcc_unreachable ();
51490 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51491 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51494 ix86_reset_previous_fndecl ();
51495 ix86_set_current_function (node->decl);
51498 /* If SIMD clone NODE can't be used in a vectorized loop
51499 in current function, return -1, otherwise return a badness of using it
51500 (0 if it is most desirable from vecsize_mangle point of view, 1
51501 slightly less desirable, etc.). */
51504 ix86_simd_clone_usable (struct cgraph_node *node)
51506 switch (node->simdclone->vecsize_mangle)
51513 return TARGET_AVX2 ? 2 : 1;
51517 return TARGET_AVX2 ? 1 : 0;
51524 gcc_unreachable ();
51528 /* This function adjusts the unroll factor based on
51529 the hardware capabilities. For ex, bdver3 has
51530 a loop buffer which makes unrolling of smaller
51531 loops less important. This function decides the
51532 unroll factor using number of memory references
51533 (value 32 is used) as a heuristic. */
51536 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51541 unsigned mem_count = 0;
51543 if (!TARGET_ADJUST_UNROLL)
51546 /* Count the number of memory references within the loop body.
51547 This value determines the unrolling factor for bdver3 and bdver4
51549 subrtx_iterator::array_type array;
51550 bbs = get_loop_body (loop);
51551 for (i = 0; i < loop->num_nodes; i++)
51552 FOR_BB_INSNS (bbs[i], insn)
51553 if (NONDEBUG_INSN_P (insn))
51554 FOR_EACH_SUBRTX (iter, array, insn, NONCONST)
51555 if (const_rtx x = *iter)
51558 machine_mode mode = GET_MODE (x);
51559 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51567 if (mem_count && mem_count <=32)
51568 return 32/mem_count;
51574 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
51577 ix86_float_exceptions_rounding_supported_p (void)
51579 /* For x87 floating point with standard excess precision handling,
51580 there is no adddf3 pattern (since x87 floating point only has
51581 XFmode operations) so the default hook implementation gets this
51583 return TARGET_80387 || TARGET_SSE_MATH;
51586 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
51589 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51591 if (!TARGET_80387 && !TARGET_SSE_MATH)
51593 tree exceptions_var = create_tmp_var (integer_type_node);
51596 tree fenv_index_type = build_index_type (size_int (6));
51597 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51598 tree fenv_var = create_tmp_var (fenv_type);
51599 mark_addressable (fenv_var);
51600 tree fenv_ptr = build_pointer_type (fenv_type);
51601 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51602 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51603 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51604 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51605 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51606 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51607 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51608 tree hold_fnclex = build_call_expr (fnclex, 0);
51609 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
51611 *clear = build_call_expr (fnclex, 0);
51612 tree sw_var = create_tmp_var (short_unsigned_type_node);
51613 tree fnstsw_call = build_call_expr (fnstsw, 0);
51614 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
51615 sw_var, fnstsw_call);
51616 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
51617 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
51618 exceptions_var, exceptions_x87);
51619 *update = build2 (COMPOUND_EXPR, integer_type_node,
51620 sw_mod, update_mod);
51621 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
51622 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
51624 if (TARGET_SSE_MATH)
51626 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
51627 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
51628 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
51629 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
51630 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
51631 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
51632 mxcsr_orig_var, stmxcsr_hold_call);
51633 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
51635 build_int_cst (unsigned_type_node, 0x1f80));
51636 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
51637 build_int_cst (unsigned_type_node, 0xffffffc0));
51638 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
51639 mxcsr_mod_var, hold_mod_val);
51640 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51641 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
51642 hold_assign_orig, hold_assign_mod);
51643 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
51644 ldmxcsr_hold_call);
51646 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
51649 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
51651 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
51652 ldmxcsr_clear_call);
51654 *clear = ldmxcsr_clear_call;
51655 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
51656 tree exceptions_sse = fold_convert (integer_type_node,
51657 stxmcsr_update_call);
51660 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
51661 exceptions_var, exceptions_sse);
51662 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
51663 exceptions_var, exceptions_mod);
51664 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
51665 exceptions_assign);
51668 *update = build2 (MODIFY_EXPR, integer_type_node,
51669 exceptions_var, exceptions_sse);
51670 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
51671 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51672 ldmxcsr_update_call);
51674 tree atomic_feraiseexcept
51675 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
51676 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
51677 1, exceptions_var);
51678 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
51679 atomic_feraiseexcept_call);
51682 /* Return mode to be used for bounds or VOIDmode
51683 if bounds are not supported. */
51685 static enum machine_mode
51686 ix86_mpx_bound_mode ()
51688 /* Do not support pointer checker if MPX
51692 if (flag_check_pointer_bounds)
51693 warning (0, "Pointer Checker requires MPX support on this target."
51694 " Use -mmpx options to enable MPX.");
51701 /* Return constant used to statically initialize constant bounds.
51703 This function is used to create special bound values. For now
51704 only INIT bounds and NONE bounds are expected. More special
51705 values may be added later. */
51708 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
51710 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
51711 : build_zero_cst (pointer_sized_int_node);
51712 tree high = ub ? build_zero_cst (pointer_sized_int_node)
51713 : build_minus_one_cst (pointer_sized_int_node);
51715 /* This function is supposed to be used to create INIT and
51716 NONE bounds only. */
51717 gcc_assert ((lb == 0 && ub == -1)
51718 || (lb == -1 && ub == 0));
51720 return build_complex (NULL, low, high);
51723 /* Generate a list of statements STMTS to initialize pointer bounds
51724 variable VAR with bounds LB and UB. Return the number of generated
51728 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
51730 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
51731 tree lhs, modify, var_p;
51733 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
51734 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
51736 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
51737 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
51738 append_to_statement_list (modify, stmts);
51740 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
51741 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
51742 TYPE_SIZE_UNIT (pointer_sized_int_node)));
51743 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
51744 append_to_statement_list (modify, stmts);
51749 /* Initialize the GCC target structure. */
51750 #undef TARGET_RETURN_IN_MEMORY
51751 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
51753 #undef TARGET_LEGITIMIZE_ADDRESS
51754 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
51756 #undef TARGET_ATTRIBUTE_TABLE
51757 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
51758 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
51759 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
51760 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51761 # undef TARGET_MERGE_DECL_ATTRIBUTES
51762 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
51765 #undef TARGET_COMP_TYPE_ATTRIBUTES
51766 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
51768 #undef TARGET_INIT_BUILTINS
51769 #define TARGET_INIT_BUILTINS ix86_init_builtins
51770 #undef TARGET_BUILTIN_DECL
51771 #define TARGET_BUILTIN_DECL ix86_builtin_decl
51772 #undef TARGET_EXPAND_BUILTIN
51773 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
51775 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
51776 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
51777 ix86_builtin_vectorized_function
51779 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
51780 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
51782 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
51783 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
51785 #undef TARGET_VECTORIZE_BUILTIN_GATHER
51786 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
51788 #undef TARGET_BUILTIN_RECIPROCAL
51789 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
51791 #undef TARGET_ASM_FUNCTION_EPILOGUE
51792 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
51794 #undef TARGET_ENCODE_SECTION_INFO
51795 #ifndef SUBTARGET_ENCODE_SECTION_INFO
51796 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
51798 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
51801 #undef TARGET_ASM_OPEN_PAREN
51802 #define TARGET_ASM_OPEN_PAREN ""
51803 #undef TARGET_ASM_CLOSE_PAREN
51804 #define TARGET_ASM_CLOSE_PAREN ""
51806 #undef TARGET_ASM_BYTE_OP
51807 #define TARGET_ASM_BYTE_OP ASM_BYTE
51809 #undef TARGET_ASM_ALIGNED_HI_OP
51810 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
51811 #undef TARGET_ASM_ALIGNED_SI_OP
51812 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
51814 #undef TARGET_ASM_ALIGNED_DI_OP
51815 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
51818 #undef TARGET_PROFILE_BEFORE_PROLOGUE
51819 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
51821 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
51822 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
51824 #undef TARGET_ASM_UNALIGNED_HI_OP
51825 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
51826 #undef TARGET_ASM_UNALIGNED_SI_OP
51827 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
51828 #undef TARGET_ASM_UNALIGNED_DI_OP
51829 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
51831 #undef TARGET_PRINT_OPERAND
51832 #define TARGET_PRINT_OPERAND ix86_print_operand
51833 #undef TARGET_PRINT_OPERAND_ADDRESS
51834 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
51835 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
51836 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
51837 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
51838 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
51840 #undef TARGET_SCHED_INIT_GLOBAL
51841 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
51842 #undef TARGET_SCHED_ADJUST_COST
51843 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
51844 #undef TARGET_SCHED_ISSUE_RATE
51845 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
51846 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
51847 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
51848 ia32_multipass_dfa_lookahead
51849 #undef TARGET_SCHED_MACRO_FUSION_P
51850 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
51851 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
51852 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
51854 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
51855 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
51857 #undef TARGET_MEMMODEL_CHECK
51858 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
51860 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
51861 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
51864 #undef TARGET_HAVE_TLS
51865 #define TARGET_HAVE_TLS true
51867 #undef TARGET_CANNOT_FORCE_CONST_MEM
51868 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
51869 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
51870 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
51872 #undef TARGET_DELEGITIMIZE_ADDRESS
51873 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
51875 #undef TARGET_MS_BITFIELD_LAYOUT_P
51876 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
51879 #undef TARGET_BINDS_LOCAL_P
51880 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
51882 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
51883 #undef TARGET_BINDS_LOCAL_P
51884 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
51887 #undef TARGET_ASM_OUTPUT_MI_THUNK
51888 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
51889 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
51890 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
51892 #undef TARGET_ASM_FILE_START
51893 #define TARGET_ASM_FILE_START x86_file_start
51895 #undef TARGET_OPTION_OVERRIDE
51896 #define TARGET_OPTION_OVERRIDE ix86_option_override
51898 #undef TARGET_REGISTER_MOVE_COST
51899 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
51900 #undef TARGET_MEMORY_MOVE_COST
51901 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
51902 #undef TARGET_RTX_COSTS
51903 #define TARGET_RTX_COSTS ix86_rtx_costs
51904 #undef TARGET_ADDRESS_COST
51905 #define TARGET_ADDRESS_COST ix86_address_cost
51907 #undef TARGET_FIXED_CONDITION_CODE_REGS
51908 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
51909 #undef TARGET_CC_MODES_COMPATIBLE
51910 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
51912 #undef TARGET_MACHINE_DEPENDENT_REORG
51913 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
51915 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
51916 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
51918 #undef TARGET_BUILD_BUILTIN_VA_LIST
51919 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
51921 #undef TARGET_FOLD_BUILTIN
51922 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
51924 #undef TARGET_COMPARE_VERSION_PRIORITY
51925 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
51927 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
51928 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
51929 ix86_generate_version_dispatcher_body
51931 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
51932 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
51933 ix86_get_function_versions_dispatcher
51935 #undef TARGET_ENUM_VA_LIST_P
51936 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
51938 #undef TARGET_FN_ABI_VA_LIST
51939 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
51941 #undef TARGET_CANONICAL_VA_LIST_TYPE
51942 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
51944 #undef TARGET_EXPAND_BUILTIN_VA_START
51945 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
51947 #undef TARGET_MD_ASM_CLOBBERS
51948 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
51950 #undef TARGET_PROMOTE_PROTOTYPES
51951 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
51952 #undef TARGET_SETUP_INCOMING_VARARGS
51953 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
51954 #undef TARGET_MUST_PASS_IN_STACK
51955 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
51956 #undef TARGET_FUNCTION_ARG_ADVANCE
51957 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
51958 #undef TARGET_FUNCTION_ARG
51959 #define TARGET_FUNCTION_ARG ix86_function_arg
51960 #undef TARGET_INIT_PIC_REG
51961 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
51962 #undef TARGET_USE_PSEUDO_PIC_REG
51963 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
51964 #undef TARGET_FUNCTION_ARG_BOUNDARY
51965 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
51966 #undef TARGET_PASS_BY_REFERENCE
51967 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
51968 #undef TARGET_INTERNAL_ARG_POINTER
51969 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
51970 #undef TARGET_UPDATE_STACK_BOUNDARY
51971 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
51972 #undef TARGET_GET_DRAP_RTX
51973 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
51974 #undef TARGET_STRICT_ARGUMENT_NAMING
51975 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
51976 #undef TARGET_STATIC_CHAIN
51977 #define TARGET_STATIC_CHAIN ix86_static_chain
51978 #undef TARGET_TRAMPOLINE_INIT
51979 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
51980 #undef TARGET_RETURN_POPS_ARGS
51981 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
51983 #undef TARGET_LEGITIMATE_COMBINED_INSN
51984 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
51986 #undef TARGET_ASAN_SHADOW_OFFSET
51987 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
51989 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
51990 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
51992 #undef TARGET_SCALAR_MODE_SUPPORTED_P
51993 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
51995 #undef TARGET_VECTOR_MODE_SUPPORTED_P
51996 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
51998 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
51999 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
52000 ix86_libgcc_floating_mode_supported_p
52002 #undef TARGET_C_MODE_FOR_SUFFIX
52003 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
52006 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
52007 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
52010 #ifdef SUBTARGET_INSERT_ATTRIBUTES
52011 #undef TARGET_INSERT_ATTRIBUTES
52012 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
52015 #undef TARGET_MANGLE_TYPE
52016 #define TARGET_MANGLE_TYPE ix86_mangle_type
52019 #undef TARGET_STACK_PROTECT_FAIL
52020 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
52023 #undef TARGET_FUNCTION_VALUE
52024 #define TARGET_FUNCTION_VALUE ix86_function_value
52026 #undef TARGET_FUNCTION_VALUE_REGNO_P
52027 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
52029 #undef TARGET_PROMOTE_FUNCTION_MODE
52030 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
52032 #undef TARGET_MEMBER_TYPE_FORCES_BLK
52033 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
52035 #undef TARGET_INSTANTIATE_DECLS
52036 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
52038 #undef TARGET_SECONDARY_RELOAD
52039 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
52041 #undef TARGET_CLASS_MAX_NREGS
52042 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
52044 #undef TARGET_PREFERRED_RELOAD_CLASS
52045 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
52046 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
52047 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
52048 #undef TARGET_CLASS_LIKELY_SPILLED_P
52049 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
52051 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
52052 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
52053 ix86_builtin_vectorization_cost
52054 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
52055 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
52056 ix86_vectorize_vec_perm_const_ok
52057 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
52058 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
52059 ix86_preferred_simd_mode
52060 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
52061 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
52062 ix86_autovectorize_vector_sizes
52063 #undef TARGET_VECTORIZE_INIT_COST
52064 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
52065 #undef TARGET_VECTORIZE_ADD_STMT_COST
52066 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
52067 #undef TARGET_VECTORIZE_FINISH_COST
52068 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
52069 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
52070 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
52072 #undef TARGET_SET_CURRENT_FUNCTION
52073 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
52075 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
52076 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
52078 #undef TARGET_OPTION_SAVE
52079 #define TARGET_OPTION_SAVE ix86_function_specific_save
52081 #undef TARGET_OPTION_RESTORE
52082 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
52084 #undef TARGET_OPTION_POST_STREAM_IN
52085 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
52087 #undef TARGET_OPTION_PRINT
52088 #define TARGET_OPTION_PRINT ix86_function_specific_print
52090 #undef TARGET_OPTION_FUNCTION_VERSIONS
52091 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
52093 #undef TARGET_CAN_INLINE_P
52094 #define TARGET_CAN_INLINE_P ix86_can_inline_p
52096 #undef TARGET_EXPAND_TO_RTL_HOOK
52097 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
52099 #undef TARGET_LEGITIMATE_ADDRESS_P
52100 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
52102 #undef TARGET_LRA_P
52103 #define TARGET_LRA_P hook_bool_void_true
52105 #undef TARGET_REGISTER_PRIORITY
52106 #define TARGET_REGISTER_PRIORITY ix86_register_priority
52108 #undef TARGET_REGISTER_USAGE_LEVELING_P
52109 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
52111 #undef TARGET_LEGITIMATE_CONSTANT_P
52112 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
52114 #undef TARGET_FRAME_POINTER_REQUIRED
52115 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
52117 #undef TARGET_CAN_ELIMINATE
52118 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
52120 #undef TARGET_EXTRA_LIVE_ON_ENTRY
52121 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
52123 #undef TARGET_ASM_CODE_END
52124 #define TARGET_ASM_CODE_END ix86_code_end
52126 #undef TARGET_CONDITIONAL_REGISTER_USAGE
52127 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
52130 #undef TARGET_INIT_LIBFUNCS
52131 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
52134 #undef TARGET_LOOP_UNROLL_ADJUST
52135 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52137 #undef TARGET_SPILL_CLASS
52138 #define TARGET_SPILL_CLASS ix86_spill_class
52140 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52141 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52142 ix86_simd_clone_compute_vecsize_and_simdlen
52144 #undef TARGET_SIMD_CLONE_ADJUST
52145 #define TARGET_SIMD_CLONE_ADJUST \
52146 ix86_simd_clone_adjust
52148 #undef TARGET_SIMD_CLONE_USABLE
52149 #define TARGET_SIMD_CLONE_USABLE \
52150 ix86_simd_clone_usable
52152 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52153 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52154 ix86_float_exceptions_rounding_supported_p
52156 #undef TARGET_MODE_EMIT
52157 #define TARGET_MODE_EMIT ix86_emit_mode_set
52159 #undef TARGET_MODE_NEEDED
52160 #define TARGET_MODE_NEEDED ix86_mode_needed
52162 #undef TARGET_MODE_AFTER
52163 #define TARGET_MODE_AFTER ix86_mode_after
52165 #undef TARGET_MODE_ENTRY
52166 #define TARGET_MODE_ENTRY ix86_mode_entry
52168 #undef TARGET_MODE_EXIT
52169 #define TARGET_MODE_EXIT ix86_mode_exit
52171 #undef TARGET_MODE_PRIORITY
52172 #define TARGET_MODE_PRIORITY ix86_mode_priority
52174 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52175 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52177 #undef TARGET_LOAD_BOUNDS_FOR_ARG
52178 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52180 #undef TARGET_STORE_BOUNDS_FOR_ARG
52181 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52183 #undef TARGET_LOAD_RETURNED_BOUNDS
52184 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52186 #undef TARGET_STORE_RETURNED_BOUNDS
52187 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52189 #undef TARGET_CHKP_BOUND_MODE
52190 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52192 #undef TARGET_BUILTIN_CHKP_FUNCTION
52193 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52195 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52196 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52198 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52199 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52201 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52202 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52204 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52205 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52207 #undef TARGET_OFFLOAD_OPTIONS
52208 #define TARGET_OFFLOAD_OPTIONS \
52209 ix86_offload_options
52211 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
52212 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
52214 struct gcc_target targetm = TARGET_INITIALIZER;
52216 #include "gt-i386.h"